File Lib/sre_compile.py changed (mode: 100644) (index d7ee4e8cb6..db8b8a2778) |
... |
... |
def _compile(code, pattern, flags): |
69 |
69 |
REPEATING_CODES = _REPEATING_CODES |
REPEATING_CODES = _REPEATING_CODES |
70 |
70 |
SUCCESS_CODES = _SUCCESS_CODES |
SUCCESS_CODES = _SUCCESS_CODES |
71 |
71 |
ASSERT_CODES = _ASSERT_CODES |
ASSERT_CODES = _ASSERT_CODES |
72 |
|
if (flags & SRE_FLAG_IGNORECASE and |
|
73 |
|
not (flags & SRE_FLAG_LOCALE) and |
|
74 |
|
flags & SRE_FLAG_UNICODE and |
|
75 |
|
not (flags & SRE_FLAG_ASCII)): |
|
76 |
|
fixes = _ignorecase_fixes |
|
77 |
|
else: |
|
78 |
|
fixes = None |
|
|
72 |
|
tolower = None |
|
73 |
|
fixes = None |
|
74 |
|
if flags & SRE_FLAG_IGNORECASE and not flags & SRE_FLAG_LOCALE: |
|
75 |
|
if flags & SRE_FLAG_UNICODE and not flags & SRE_FLAG_ASCII: |
|
76 |
|
tolower = _sre.unicode_tolower |
|
77 |
|
fixes = _ignorecase_fixes |
|
78 |
|
else: |
|
79 |
|
tolower = _sre.ascii_tolower |
79 |
80 |
for op, av in pattern: |
for op, av in pattern: |
80 |
81 |
if op in LITERAL_CODES: |
if op in LITERAL_CODES: |
81 |
82 |
if not flags & SRE_FLAG_IGNORECASE: |
if not flags & SRE_FLAG_IGNORECASE: |
|
... |
... |
def _compile(code, pattern, flags): |
85 |
86 |
emit(OP_LOC_IGNORE[op]) |
emit(OP_LOC_IGNORE[op]) |
86 |
87 |
emit(av) |
emit(av) |
87 |
88 |
else: |
else: |
88 |
|
lo = _sre.getlower(av, flags) |
|
|
89 |
|
lo = tolower(av) |
89 |
90 |
if fixes and lo in fixes: |
if fixes and lo in fixes: |
90 |
91 |
emit(IN_IGNORE) |
emit(IN_IGNORE) |
91 |
92 |
skip = _len(code); emit(0) |
skip = _len(code); emit(0) |
|
... |
... |
def _compile(code, pattern, flags): |
102 |
103 |
elif op is IN: |
elif op is IN: |
103 |
104 |
if not flags & SRE_FLAG_IGNORECASE: |
if not flags & SRE_FLAG_IGNORECASE: |
104 |
105 |
emit(op) |
emit(op) |
105 |
|
fixup = None |
|
106 |
106 |
elif flags & SRE_FLAG_LOCALE: |
elif flags & SRE_FLAG_LOCALE: |
107 |
107 |
emit(IN_LOC_IGNORE) |
emit(IN_LOC_IGNORE) |
108 |
|
fixup = None |
|
109 |
108 |
else: |
else: |
110 |
109 |
emit(IN_IGNORE) |
emit(IN_IGNORE) |
111 |
|
def fixup(literal, flags=flags): |
|
112 |
|
return _sre.getlower(literal, flags) |
|
113 |
110 |
skip = _len(code); emit(0) |
skip = _len(code); emit(0) |
114 |
|
_compile_charset(av, flags, code, fixup, fixes) |
|
|
111 |
|
_compile_charset(av, flags, code, tolower, fixes) |
115 |
112 |
code[skip] = _len(code) - skip |
code[skip] = _len(code) - skip |
116 |
113 |
elif op is ANY: |
elif op is ANY: |
117 |
114 |
if flags & SRE_FLAG_DOTALL: |
if flags & SRE_FLAG_DOTALL: |
File Lib/test/test_re.py changed (mode: 100644) (index 7601dc88c7..b5b7cff9a2) |
... |
... |
class ReTests(unittest.TestCase): |
883 |
883 |
def test_category(self): |
def test_category(self): |
884 |
884 |
self.assertEqual(re.match(r"(\s)", " ").group(1), " ") |
self.assertEqual(re.match(r"(\s)", " ").group(1), " ") |
885 |
885 |
|
|
886 |
|
def test_getlower(self): |
|
|
886 |
|
@cpython_only |
|
887 |
|
def test_case_helpers(self): |
887 |
888 |
import _sre |
import _sre |
888 |
|
self.assertEqual(_sre.getlower(ord('A'), 0), ord('a')) |
|
889 |
|
self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a')) |
|
890 |
|
self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a')) |
|
891 |
|
self.assertEqual(_sre.getlower(ord('A'), re.ASCII), ord('a')) |
|
892 |
|
|
|
893 |
|
self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC") |
|
894 |
|
self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC") |
|
895 |
|
self.assertEqual(re.match("abc", "ABC", re.I|re.A).group(0), "ABC") |
|
896 |
|
self.assertEqual(re.match(b"abc", b"ABC", re.I|re.L).group(0), b"ABC") |
|
|
889 |
|
for i in range(128): |
|
890 |
|
c = chr(i) |
|
891 |
|
lo = ord(c.lower()) |
|
892 |
|
self.assertEqual(_sre.ascii_tolower(i), lo) |
|
893 |
|
self.assertEqual(_sre.unicode_tolower(i), lo) |
|
894 |
|
|
|
895 |
|
for i in list(range(128, 0x1000)) + [0x10400, 0x10428]: |
|
896 |
|
c = chr(i) |
|
897 |
|
self.assertEqual(_sre.ascii_tolower(i), i) |
|
898 |
|
if i != 0x0130: |
|
899 |
|
self.assertEqual(_sre.unicode_tolower(i), ord(c.lower())) |
|
900 |
|
|
|
901 |
|
self.assertEqual(_sre.ascii_tolower(0x0130), 0x0130) |
|
902 |
|
self.assertEqual(_sre.unicode_tolower(0x0130), ord('i')) |
897 |
903 |
|
|
898 |
904 |
def test_not_literal(self): |
def test_not_literal(self): |
899 |
905 |
self.assertEqual(re.search(r"\s([^a])", " b").group(1), "b") |
self.assertEqual(re.search(r"\s([^a])", " b").group(1), "b") |
File Modules/_sre.c changed (mode: 100644) (index afb2bce77b..a86c5f252b) |
... |
... |
_sre_getcodesize_impl(PyObject *module) |
274 |
274 |
} |
} |
275 |
275 |
|
|
276 |
276 |
/*[clinic input] |
/*[clinic input] |
277 |
|
_sre.getlower -> int |
|
|
277 |
|
_sre.ascii_tolower -> int |
278 |
278 |
|
|
279 |
279 |
character: int |
character: int |
280 |
|
flags: int |
|
281 |
280 |
/ |
/ |
282 |
281 |
|
|
283 |
282 |
[clinic start generated code]*/ |
[clinic start generated code]*/ |
284 |
283 |
|
|
285 |
284 |
static int |
static int |
286 |
|
_sre_getlower_impl(PyObject *module, int character, int flags) |
|
287 |
|
/*[clinic end generated code: output=47eebc4c1214feb5 input=087d2f1c44bbca6f]*/ |
|
|
285 |
|
_sre_ascii_tolower_impl(PyObject *module, int character) |
|
286 |
|
/*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/ |
288 |
287 |
{ |
{ |
289 |
|
if (flags & SRE_FLAG_LOCALE) |
|
290 |
|
return sre_lower_locale(character); |
|
291 |
|
if (flags & SRE_FLAG_UNICODE) |
|
292 |
|
return sre_lower_unicode(character); |
|
293 |
288 |
return sre_lower(character); |
return sre_lower(character); |
294 |
289 |
} |
} |
295 |
290 |
|
|
|
291 |
|
/*[clinic input] |
|
292 |
|
_sre.unicode_tolower -> int |
|
293 |
|
|
|
294 |
|
character: int |
|
295 |
|
/ |
|
296 |
|
|
|
297 |
|
[clinic start generated code]*/ |
|
298 |
|
|
|
299 |
|
static int |
|
300 |
|
_sre_unicode_tolower_impl(PyObject *module, int character) |
|
301 |
|
/*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/ |
|
302 |
|
{ |
|
303 |
|
return sre_lower_unicode(character); |
|
304 |
|
} |
|
305 |
|
|
296 |
306 |
LOCAL(void) |
LOCAL(void) |
297 |
307 |
state_reset(SRE_STATE* state) |
state_reset(SRE_STATE* state) |
298 |
308 |
{ |
{ |
|
... |
... |
static PyTypeObject Scanner_Type = { |
2740 |
2750 |
static PyMethodDef _functions[] = { |
static PyMethodDef _functions[] = { |
2741 |
2751 |
_SRE_COMPILE_METHODDEF |
_SRE_COMPILE_METHODDEF |
2742 |
2752 |
_SRE_GETCODESIZE_METHODDEF |
_SRE_GETCODESIZE_METHODDEF |
2743 |
|
_SRE_GETLOWER_METHODDEF |
|
|
2753 |
|
_SRE_ASCII_TOLOWER_METHODDEF |
|
2754 |
|
_SRE_UNICODE_TOLOWER_METHODDEF |
2744 |
2755 |
{NULL, NULL} |
{NULL, NULL} |
2745 |
2756 |
}; |
}; |
2746 |
2757 |
|
|
File Modules/clinic/_sre.c.h changed (mode: 100644) (index 5278323f31..8056eda3b7) |
... |
... |
exit: |
29 |
29 |
return return_value; |
return return_value; |
30 |
30 |
} |
} |
31 |
31 |
|
|
32 |
|
PyDoc_STRVAR(_sre_getlower__doc__, |
|
33 |
|
"getlower($module, character, flags, /)\n" |
|
|
32 |
|
PyDoc_STRVAR(_sre_ascii_tolower__doc__, |
|
33 |
|
"ascii_tolower($module, character, /)\n" |
34 |
34 |
"--\n" |
"--\n" |
35 |
35 |
"\n"); |
"\n"); |
36 |
36 |
|
|
37 |
|
#define _SRE_GETLOWER_METHODDEF \ |
|
38 |
|
{"getlower", (PyCFunction)_sre_getlower, METH_FASTCALL, _sre_getlower__doc__}, |
|
|
37 |
|
#define _SRE_ASCII_TOLOWER_METHODDEF \ |
|
38 |
|
{"ascii_tolower", (PyCFunction)_sre_ascii_tolower, METH_O, _sre_ascii_tolower__doc__}, |
39 |
39 |
|
|
40 |
40 |
static int |
static int |
41 |
|
_sre_getlower_impl(PyObject *module, int character, int flags); |
|
|
41 |
|
_sre_ascii_tolower_impl(PyObject *module, int character); |
42 |
42 |
|
|
43 |
43 |
static PyObject * |
static PyObject * |
44 |
|
_sre_getlower(PyObject *module, PyObject **args, Py_ssize_t nargs, PyObject *kwnames) |
|
|
44 |
|
_sre_ascii_tolower(PyObject *module, PyObject *arg) |
45 |
45 |
{ |
{ |
46 |
46 |
PyObject *return_value = NULL; |
PyObject *return_value = NULL; |
47 |
47 |
int character; |
int character; |
48 |
|
int flags; |
|
49 |
48 |
int _return_value; |
int _return_value; |
50 |
49 |
|
|
51 |
|
if (!_PyArg_ParseStack(args, nargs, "ii:getlower", |
|
52 |
|
&character, &flags)) { |
|
|
50 |
|
if (!PyArg_Parse(arg, "i:ascii_tolower", &character)) { |
|
51 |
|
goto exit; |
|
52 |
|
} |
|
53 |
|
_return_value = _sre_ascii_tolower_impl(module, character); |
|
54 |
|
if ((_return_value == -1) && PyErr_Occurred()) { |
53 |
55 |
goto exit; |
goto exit; |
54 |
56 |
} |
} |
|
57 |
|
return_value = PyLong_FromLong((long)_return_value); |
|
58 |
|
|
|
59 |
|
exit: |
|
60 |
|
return return_value; |
|
61 |
|
} |
|
62 |
|
|
|
63 |
|
PyDoc_STRVAR(_sre_unicode_tolower__doc__, |
|
64 |
|
"unicode_tolower($module, character, /)\n" |
|
65 |
|
"--\n" |
|
66 |
|
"\n"); |
|
67 |
|
|
|
68 |
|
#define _SRE_UNICODE_TOLOWER_METHODDEF \ |
|
69 |
|
{"unicode_tolower", (PyCFunction)_sre_unicode_tolower, METH_O, _sre_unicode_tolower__doc__}, |
|
70 |
|
|
|
71 |
|
static int |
|
72 |
|
_sre_unicode_tolower_impl(PyObject *module, int character); |
|
73 |
|
|
|
74 |
|
static PyObject * |
|
75 |
|
_sre_unicode_tolower(PyObject *module, PyObject *arg) |
|
76 |
|
{ |
|
77 |
|
PyObject *return_value = NULL; |
|
78 |
|
int character; |
|
79 |
|
int _return_value; |
55 |
80 |
|
|
56 |
|
if (!_PyArg_NoStackKeywords("getlower", kwnames)) { |
|
|
81 |
|
if (!PyArg_Parse(arg, "i:unicode_tolower", &character)) { |
57 |
82 |
goto exit; |
goto exit; |
58 |
83 |
} |
} |
59 |
|
_return_value = _sre_getlower_impl(module, character, flags); |
|
|
84 |
|
_return_value = _sre_unicode_tolower_impl(module, character); |
60 |
85 |
if ((_return_value == -1) && PyErr_Occurred()) { |
if ((_return_value == -1) && PyErr_Occurred()) { |
61 |
86 |
goto exit; |
goto exit; |
62 |
87 |
} |
} |
|
... |
... |
_sre_SRE_Scanner_search(ScannerObject *self, PyObject *Py_UNUSED(ignored)) |
690 |
715 |
{ |
{ |
691 |
716 |
return _sre_SRE_Scanner_search_impl(self); |
return _sre_SRE_Scanner_search_impl(self); |
692 |
717 |
} |
} |
693 |
|
/*[clinic end generated code: output=e6dab3ba8864da9e input=a9049054013a1b77]*/ |
|
|
718 |
|
/*[clinic end generated code: output=811e67d7f8f5052e input=a9049054013a1b77]*/ |