File Lib/re.py changed (mode: 100644) (index 7053eddbe0..d0ee5db175) |
... |
... |
_MAXCACHE = 512 |
268 |
268 |
def _compile(pattern, flags): |
def _compile(pattern, flags): |
269 |
269 |
# internal: compile pattern |
# internal: compile pattern |
270 |
270 |
try: |
try: |
271 |
|
p, loc = _cache[type(pattern), pattern, flags] |
|
272 |
|
if loc is None or loc == _locale.setlocale(_locale.LC_CTYPE): |
|
273 |
|
return p |
|
|
271 |
|
return _cache[type(pattern), pattern, flags] |
274 |
272 |
except KeyError: |
except KeyError: |
275 |
273 |
pass |
pass |
276 |
274 |
if isinstance(pattern, _pattern_type): |
if isinstance(pattern, _pattern_type): |
|
... |
... |
def _compile(pattern, flags): |
284 |
282 |
if not (flags & DEBUG): |
if not (flags & DEBUG): |
285 |
283 |
if len(_cache) >= _MAXCACHE: |
if len(_cache) >= _MAXCACHE: |
286 |
284 |
_cache.clear() |
_cache.clear() |
287 |
|
if p.flags & LOCALE: |
|
288 |
|
if not _locale: |
|
289 |
|
return p |
|
290 |
|
loc = _locale.setlocale(_locale.LC_CTYPE) |
|
291 |
|
else: |
|
292 |
|
loc = None |
|
293 |
|
_cache[type(pattern), pattern, flags] = p, loc |
|
|
285 |
|
_cache[type(pattern), pattern, flags] = p |
294 |
286 |
return p |
return p |
295 |
287 |
|
|
296 |
288 |
@functools.lru_cache(_MAXCACHE) |
@functools.lru_cache(_MAXCACHE) |
File Lib/sre_compile.py changed (mode: 100644) (index 2cc39007ac..d7ee4e8cb6) |
... |
... |
def _compile(code, pattern, flags): |
78 |
78 |
fixes = None |
fixes = None |
79 |
79 |
for op, av in pattern: |
for op, av in pattern: |
80 |
80 |
if op in LITERAL_CODES: |
if op in LITERAL_CODES: |
81 |
|
if flags & SRE_FLAG_IGNORECASE: |
|
|
81 |
|
if not flags & SRE_FLAG_IGNORECASE: |
|
82 |
|
emit(op) |
|
83 |
|
emit(av) |
|
84 |
|
elif flags & SRE_FLAG_LOCALE: |
|
85 |
|
emit(OP_LOC_IGNORE[op]) |
|
86 |
|
emit(av) |
|
87 |
|
else: |
82 |
88 |
lo = _sre.getlower(av, flags) |
lo = _sre.getlower(av, flags) |
83 |
89 |
if fixes and lo in fixes: |
if fixes and lo in fixes: |
84 |
90 |
emit(IN_IGNORE) |
emit(IN_IGNORE) |
|
... |
... |
def _compile(code, pattern, flags): |
93 |
99 |
else: |
else: |
94 |
100 |
emit(OP_IGNORE[op]) |
emit(OP_IGNORE[op]) |
95 |
101 |
emit(lo) |
emit(lo) |
96 |
|
else: |
|
97 |
|
emit(op) |
|
98 |
|
emit(av) |
|
99 |
102 |
elif op is IN: |
elif op is IN: |
100 |
|
if flags & SRE_FLAG_IGNORECASE: |
|
101 |
|
emit(OP_IGNORE[op]) |
|
102 |
|
def fixup(literal, flags=flags): |
|
103 |
|
return _sre.getlower(literal, flags) |
|
104 |
|
else: |
|
|
103 |
|
if not flags & SRE_FLAG_IGNORECASE: |
105 |
104 |
emit(op) |
emit(op) |
106 |
105 |
fixup = None |
fixup = None |
|
106 |
|
elif flags & SRE_FLAG_LOCALE: |
|
107 |
|
emit(IN_LOC_IGNORE) |
|
108 |
|
fixup = None |
|
109 |
|
else: |
|
110 |
|
emit(IN_IGNORE) |
|
111 |
|
def fixup(literal, flags=flags): |
|
112 |
|
return _sre.getlower(literal, flags) |
107 |
113 |
skip = _len(code); emit(0) |
skip = _len(code); emit(0) |
108 |
114 |
_compile_charset(av, flags, code, fixup, fixes) |
_compile_charset(av, flags, code, fixup, fixes) |
109 |
115 |
code[skip] = _len(code) - skip |
code[skip] = _len(code) - skip |
File Lib/test/test_re.py changed (mode: 100644) (index da5c953ced..7601dc88c7) |
... |
... |
SUBPATTERN None 0 0 |
1730 |
1730 |
self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5')) |
self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5')) |
1731 |
1731 |
self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5')) |
self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5')) |
1732 |
1732 |
|
|
|
1733 |
|
def test_locale_compiled(self): |
|
1734 |
|
oldlocale = locale.setlocale(locale.LC_CTYPE) |
|
1735 |
|
self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale) |
|
1736 |
|
for loc in 'en_US.iso88591', 'en_US.utf8': |
|
1737 |
|
try: |
|
1738 |
|
locale.setlocale(locale.LC_CTYPE, loc) |
|
1739 |
|
except locale.Error: |
|
1740 |
|
# Unsupported locale on this system |
|
1741 |
|
self.skipTest('test needs %s locale' % loc) |
|
1742 |
|
|
|
1743 |
|
locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591') |
|
1744 |
|
p1 = re.compile(b'\xc5\xe5', re.L|re.I) |
|
1745 |
|
p2 = re.compile(b'[a\xc5][a\xe5]', re.L|re.I) |
|
1746 |
|
p3 = re.compile(b'[az\xc5][az\xe5]', re.L|re.I) |
|
1747 |
|
p4 = re.compile(b'[^\xc5][^\xe5]', re.L|re.I) |
|
1748 |
|
for p in p1, p2, p3: |
|
1749 |
|
self.assertTrue(p.match(b'\xc5\xe5')) |
|
1750 |
|
self.assertTrue(p.match(b'\xe5\xe5')) |
|
1751 |
|
self.assertTrue(p.match(b'\xc5\xc5')) |
|
1752 |
|
self.assertIsNone(p4.match(b'\xe5\xc5')) |
|
1753 |
|
self.assertIsNone(p4.match(b'\xe5\xe5')) |
|
1754 |
|
self.assertIsNone(p4.match(b'\xc5\xc5')) |
|
1755 |
|
|
|
1756 |
|
locale.setlocale(locale.LC_CTYPE, 'en_US.utf8') |
|
1757 |
|
for p in p1, p2, p3: |
|
1758 |
|
self.assertTrue(p.match(b'\xc5\xe5')) |
|
1759 |
|
self.assertIsNone(p.match(b'\xe5\xe5')) |
|
1760 |
|
self.assertIsNone(p.match(b'\xc5\xc5')) |
|
1761 |
|
self.assertTrue(p4.match(b'\xe5\xc5')) |
|
1762 |
|
self.assertIsNone(p4.match(b'\xe5\xe5')) |
|
1763 |
|
self.assertIsNone(p4.match(b'\xc5\xc5')) |
|
1764 |
|
|
1733 |
1765 |
def test_error(self): |
def test_error(self): |
1734 |
1766 |
with self.assertRaises(re.error) as cm: |
with self.assertRaises(re.error) as cm: |
1735 |
1767 |
re.compile('(\u20ac))') |
re.compile('(\u20ac))') |
File Modules/_sre.c changed (mode: 100644) (index 03a138ee01..afb2bce77b) |
... |
... |
_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) |
1588 |
1588 |
case SRE_OP_NOT_LITERAL: |
case SRE_OP_NOT_LITERAL: |
1589 |
1589 |
case SRE_OP_LITERAL_IGNORE: |
case SRE_OP_LITERAL_IGNORE: |
1590 |
1590 |
case SRE_OP_NOT_LITERAL_IGNORE: |
case SRE_OP_NOT_LITERAL_IGNORE: |
|
1591 |
|
case SRE_OP_LITERAL_LOC_IGNORE: |
|
1592 |
|
case SRE_OP_NOT_LITERAL_LOC_IGNORE: |
1591 |
1593 |
GET_ARG; |
GET_ARG; |
1592 |
1594 |
/* The arg is just a character, nothing to check */ |
/* The arg is just a character, nothing to check */ |
1593 |
1595 |
break; |
break; |
|
... |
... |
_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) |
1625 |
1627 |
|
|
1626 |
1628 |
case SRE_OP_IN: |
case SRE_OP_IN: |
1627 |
1629 |
case SRE_OP_IN_IGNORE: |
case SRE_OP_IN_IGNORE: |
|
1630 |
|
case SRE_OP_IN_LOC_IGNORE: |
1628 |
1631 |
GET_SKIP; |
GET_SKIP; |
1629 |
1632 |
/* Stop 1 before the end; we check the FAILURE below */ |
/* Stop 1 before the end; we check the FAILURE below */ |
1630 |
1633 |
if (!_validate_charset(code, code+skip-2)) |
if (!_validate_charset(code, code+skip-2)) |
File Modules/sre_constants.h changed (mode: 100644) (index 6632442efe..6d6d21efd0) |
11 |
11 |
* See the _sre.c file for information on usage and redistribution. |
* See the _sre.c file for information on usage and redistribution. |
12 |
12 |
*/ |
*/ |
13 |
13 |
|
|
14 |
|
#define SRE_MAGIC 20140917 |
|
|
14 |
|
#define SRE_MAGIC 20170530 |
15 |
15 |
#define SRE_OP_FAILURE 0 |
#define SRE_OP_FAILURE 0 |
16 |
16 |
#define SRE_OP_SUCCESS 1 |
#define SRE_OP_SUCCESS 1 |
17 |
17 |
#define SRE_OP_ANY 2 |
#define SRE_OP_ANY 2 |
|
45 |
45 |
#define SRE_OP_SUBPATTERN 30 |
#define SRE_OP_SUBPATTERN 30 |
46 |
46 |
#define SRE_OP_MIN_REPEAT_ONE 31 |
#define SRE_OP_MIN_REPEAT_ONE 31 |
47 |
47 |
#define SRE_OP_RANGE_IGNORE 32 |
#define SRE_OP_RANGE_IGNORE 32 |
|
48 |
|
#define SRE_OP_LITERAL_LOC_IGNORE 33 |
|
49 |
|
#define SRE_OP_NOT_LITERAL_LOC_IGNORE 34 |
|
50 |
|
#define SRE_OP_IN_LOC_IGNORE 35 |
48 |
51 |
#define SRE_AT_BEGINNING 0 |
#define SRE_AT_BEGINNING 0 |
49 |
52 |
#define SRE_AT_BEGINNING_LINE 1 |
#define SRE_AT_BEGINNING_LINE 1 |
50 |
53 |
#define SRE_AT_BEGINNING_STRING 2 |
#define SRE_AT_BEGINNING_STRING 2 |
File Modules/sre_lib.h changed (mode: 100644) (index 0865fc63a0..b540d219dd) |
... |
... |
SRE(at)(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at) |
100 |
100 |
return 0; |
return 0; |
101 |
101 |
} |
} |
102 |
102 |
|
|
|
103 |
|
LOCAL(int) |
|
104 |
|
SRE(char_loc_ignore)(SRE_STATE* state, SRE_CODE pattern, SRE_CODE ch) |
|
105 |
|
{ |
|
106 |
|
return ch == pattern |
|
107 |
|
|| (SRE_CODE) state->lower(ch) == pattern |
|
108 |
|
|| (SRE_CODE) state->upper(ch) == pattern; |
|
109 |
|
} |
|
110 |
|
|
103 |
111 |
LOCAL(int) |
LOCAL(int) |
104 |
112 |
SRE(charset)(SRE_STATE* state, SRE_CODE* set, SRE_CODE ch) |
SRE(charset)(SRE_STATE* state, SRE_CODE* set, SRE_CODE ch) |
105 |
113 |
{ |
{ |
|
... |
... |
SRE(charset)(SRE_STATE* state, SRE_CODE* set, SRE_CODE ch) |
187 |
195 |
} |
} |
188 |
196 |
} |
} |
189 |
197 |
|
|
|
198 |
|
LOCAL(int) |
|
199 |
|
SRE(charset_loc_ignore)(SRE_STATE* state, SRE_CODE* set, SRE_CODE ch) |
|
200 |
|
{ |
|
201 |
|
SRE_CODE lo, up; |
|
202 |
|
lo = state->lower(ch); |
|
203 |
|
if (SRE(charset)(state, set, lo)) |
|
204 |
|
return 1; |
|
205 |
|
|
|
206 |
|
up = state->upper(ch); |
|
207 |
|
return up != lo && SRE(charset)(state, set, up); |
|
208 |
|
} |
|
209 |
|
|
190 |
210 |
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int match_all); |
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int match_all); |
191 |
211 |
|
|
192 |
212 |
LOCAL(Py_ssize_t) |
LOCAL(Py_ssize_t) |
|
... |
... |
SRE(count)(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount) |
247 |
267 |
ptr++; |
ptr++; |
248 |
268 |
break; |
break; |
249 |
269 |
|
|
|
270 |
|
case SRE_OP_LITERAL_LOC_IGNORE: |
|
271 |
|
/* repeated literal */ |
|
272 |
|
chr = pattern[1]; |
|
273 |
|
TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr)); |
|
274 |
|
while (ptr < end && SRE(char_loc_ignore)(state, chr, *ptr)) |
|
275 |
|
ptr++; |
|
276 |
|
break; |
|
277 |
|
|
250 |
278 |
case SRE_OP_NOT_LITERAL: |
case SRE_OP_NOT_LITERAL: |
251 |
279 |
/* repeated non-literal */ |
/* repeated non-literal */ |
252 |
280 |
chr = pattern[1]; |
chr = pattern[1]; |
|
... |
... |
SRE(count)(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount) |
269 |
297 |
ptr++; |
ptr++; |
270 |
298 |
break; |
break; |
271 |
299 |
|
|
|
300 |
|
case SRE_OP_NOT_LITERAL_LOC_IGNORE: |
|
301 |
|
/* repeated non-literal */ |
|
302 |
|
chr = pattern[1]; |
|
303 |
|
TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr)); |
|
304 |
|
while (ptr < end && !SRE(char_loc_ignore)(state, chr, *ptr)) |
|
305 |
|
ptr++; |
|
306 |
|
break; |
|
307 |
|
|
272 |
308 |
default: |
default: |
273 |
309 |
/* repeated single character pattern */ |
/* repeated single character pattern */ |
274 |
310 |
TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr)); |
TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr)); |
|
... |
... |
entrance: |
651 |
687 |
TRACE(("|%p|%p|LITERAL_IGNORE %d\n", |
TRACE(("|%p|%p|LITERAL_IGNORE %d\n", |
652 |
688 |
ctx->pattern, ctx->ptr, ctx->pattern[0])); |
ctx->pattern, ctx->ptr, ctx->pattern[0])); |
653 |
689 |
if (ctx->ptr >= end || |
if (ctx->ptr >= end || |
654 |
|
state->lower(*ctx->ptr) != state->lower(*ctx->pattern)) |
|
|
690 |
|
state->lower(*ctx->ptr) != *ctx->pattern) |
|
691 |
|
RETURN_FAILURE; |
|
692 |
|
ctx->pattern++; |
|
693 |
|
ctx->ptr++; |
|
694 |
|
break; |
|
695 |
|
|
|
696 |
|
case SRE_OP_LITERAL_LOC_IGNORE: |
|
697 |
|
TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n", |
|
698 |
|
ctx->pattern, ctx->ptr, ctx->pattern[0])); |
|
699 |
|
if (ctx->ptr >= end |
|
700 |
|
|| !SRE(char_loc_ignore)(state, *ctx->pattern, *ctx->ptr)) |
655 |
701 |
RETURN_FAILURE; |
RETURN_FAILURE; |
656 |
702 |
ctx->pattern++; |
ctx->pattern++; |
657 |
703 |
ctx->ptr++; |
ctx->ptr++; |
|
... |
... |
entrance: |
661 |
707 |
TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", |
TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", |
662 |
708 |
ctx->pattern, ctx->ptr, *ctx->pattern)); |
ctx->pattern, ctx->ptr, *ctx->pattern)); |
663 |
709 |
if (ctx->ptr >= end || |
if (ctx->ptr >= end || |
664 |
|
state->lower(*ctx->ptr) == state->lower(*ctx->pattern)) |
|
|
710 |
|
state->lower(*ctx->ptr) == *ctx->pattern) |
|
711 |
|
RETURN_FAILURE; |
|
712 |
|
ctx->pattern++; |
|
713 |
|
ctx->ptr++; |
|
714 |
|
break; |
|
715 |
|
|
|
716 |
|
case SRE_OP_NOT_LITERAL_LOC_IGNORE: |
|
717 |
|
TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n", |
|
718 |
|
ctx->pattern, ctx->ptr, *ctx->pattern)); |
|
719 |
|
if (ctx->ptr >= end |
|
720 |
|
|| SRE(char_loc_ignore)(state, *ctx->pattern, *ctx->ptr)) |
665 |
721 |
RETURN_FAILURE; |
RETURN_FAILURE; |
666 |
722 |
ctx->pattern++; |
ctx->pattern++; |
667 |
723 |
ctx->ptr++; |
ctx->ptr++; |
|
... |
... |
entrance: |
677 |
733 |
ctx->ptr++; |
ctx->ptr++; |
678 |
734 |
break; |
break; |
679 |
735 |
|
|
|
736 |
|
case SRE_OP_IN_LOC_IGNORE: |
|
737 |
|
TRACE(("|%p|%p|IN_LOC_IGNORE\n", ctx->pattern, ctx->ptr)); |
|
738 |
|
if (ctx->ptr >= end |
|
739 |
|
|| !SRE(charset_loc_ignore)(state, ctx->pattern+1, *ctx->ptr)) |
|
740 |
|
RETURN_FAILURE; |
|
741 |
|
ctx->pattern += ctx->pattern[0]; |
|
742 |
|
ctx->ptr++; |
|
743 |
|
break; |
|
744 |
|
|
680 |
745 |
case SRE_OP_JUMP: |
case SRE_OP_JUMP: |
681 |
746 |
case SRE_OP_INFO: |
case SRE_OP_INFO: |
682 |
747 |
/* jump forward */ |
/* jump forward */ |