@@ -3301,6 +3301,14 @@ setup_subexp_call(Node* node, ScanEnv* env)
}
#endif
+#define IN_ALT (1<<0)
+#define IN_NOT (1<<1)
+#define IN_REPEAT (1<<2)
+#define IN_VAR_REPEAT (1<<3)
+#define IN_CALL (1<<4)
+#define IN_RECCALL (1<<5)
+#define IN_LOOK_BEHIND (1<<6)
+
/* divide different length alternatives in look-behind.
(?<=A|B) ==> (?<=A)|(?<=B)
(?<!A|B) ==> (?<!A)(?<!B)
@@ -3597,24 +3605,29 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
return ONIGERR_MEMORY;
}
-static int
-expand_case_fold_string(Node* node, regex_t* reg)
-{
#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
+static int
+expand_case_fold_string(Node* node, regex_t* reg, int state)
+{
int r, n, len, alt_num;
int varlen = 0;
+ int is_in_look_behind;
UChar *start, *end, *p;
Node *top_root, *root, *snode, *prev_node;
OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
- StrNode* sn = NSTR(node);
+ StrNode* sn;
if (NSTRING_IS_AMBIG(node)) return 0;
+ sn = NSTR(node);
+
start = sn->s;
end = sn->end;
if (start >= end) return 0;
+ is_in_look_behind = (state & IN_LOOK_BEHIND) != 0;
+
r = 0;
top_root = root = prev_node = snode = NULL_NODE;
alt_num = 1;
@@ -3630,7 +3643,7 @@ expand_case_fold_string(Node* node, regex_t* reg)
len = enclen(reg->enc, p, end);
varlen = is_case_fold_variable_len(n, items, len);
- if (n == 0 || varlen == 0) {
+ if (n == 0 || varlen == 0 || is_in_look_behind) {
if (IS_NULL(snode)) {
if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
onig_node_free(top_root);
@@ -3889,13 +3902,6 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env)
}
#endif
-#define IN_ALT (1<<0)
-#define IN_NOT (1<<1)
-#define IN_REPEAT (1<<2)
-#define IN_VAR_REPEAT (1<<3)
-#define IN_CALL (1<<4)
-#define IN_RECCALL (1<<5)
-
/* setup_tree does the following work.
1. check empty loop. (set qn->target_empty_info)
2. expand ignore-case in char class.
@@ -3937,7 +3943,7 @@ restart:
case NT_STR:
if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
- r = expand_case_fold_string(node, reg);
+ r = expand_case_fold_string(node, reg, state);
}
break;
@@ -4180,7 +4186,7 @@ restart:
if (r < 0) return r;
if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
if (NTYPE(node) != NT_ANCHOR) goto restart;
- r = setup_tree(an->target, reg, state, env);
+ r = setup_tree(an->target, reg, (state | IN_LOOK_BEHIND), env);
if (r != 0) return r;
r = setup_look_behind(node, reg, env);
}
@@ -4193,7 +4199,8 @@ restart:
if (r < 0) return r;
if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
if (NTYPE(node) != NT_ANCHOR) goto restart;
- r = setup_tree(an->target, reg, (state | IN_NOT), env);
+ r = setup_tree(an->target, reg, (state | IN_NOT | IN_LOOK_BEHIND),
+ env);
if (r != 0) return r;
r = setup_look_behind(node, reg, env);
}
@@ -112,7 +112,7 @@ describe "Literal Regexps" do
/foo.(?<=\d)/.match("fooA foo1").to_a.should == ["foo1"]
end
- ruby_bug "#13671", ""..."3.6" do # https://bugs.ruby-lang.org/issues/13671
+ ruby_bug "#13671", ""..."3.5" do # https://bugs.ruby-lang.org/issues/13671
it "handles a lookbehind with ss characters" do
r = Regexp.new("(?<!dss)", Regexp::IGNORECASE)
r.should =~ "✨"
@@ -1743,6 +1743,29 @@ class TestRegexp < Test::Unit::TestCase
assert_raise(RegexpError, bug12418){ Regexp.new('(0?0|(?(5)||)|(?(5)||))?') }
end
+ def test_ss_in_look_behind
+ assert_match_at("(?i:ss)", "ss", [[0, 2]])
+ assert_match_at("(?i:ss)", "Ss", [[0, 2]])
+ assert_match_at("(?i:ss)", "SS", [[0, 2]])
+ assert_match_at("(?i:ss)", "\u017fS", [[0, 2]]) # LATIN SMALL LETTER LONG S
+ assert_match_at("(?i:ss)", "s\u017f", [[0, 2]])
+ assert_match_at("(?i:ss)", "\u00df", [[0, 1]]) # LATIN SMALL LETTER SHARP S
+ assert_match_at("(?i:ss)", "\u1e9e", [[0, 1]]) # LATIN CAPITAL LETTER SHARP S
+ assert_match_at("(?i:xssy)", "xssy", [[0, 4]])
+ assert_match_at("(?i:xssy)", "xSsy", [[0, 4]])
+ assert_match_at("(?i:xssy)", "xSSy", [[0, 4]])
+ assert_match_at("(?i:xssy)", "x\u017fSy", [[0, 4]])
+ assert_match_at("(?i:xssy)", "xs\u017fy", [[0, 4]])
+ assert_match_at("(?i:xssy)", "x\u00dfy", [[0, 3]])
+ assert_match_at("(?i:xssy)", "x\u1e9ey", [[0, 3]])
+ assert_match_at("(?i:\u00df)", "ss", [[0, 2]])
+ assert_match_at("(?i:\u00df)", "SS", [[0, 2]])
+ assert_match_at("(?i:[\u00df])", "ss", [[0, 2]])
+ assert_match_at("(?i:[\u00df])", "SS", [[0, 2]])
+ assert_match_at("(?i)(?<!ss)\u2728", "qq\u2728", [[2, 3]]) # Issue #92
+ assert_match_at("(?i)(?<!xss)\u2728", "qq\u2728", [[2, 3]])
+ end
+
def test_options_in_look_behind
assert_nothing_raised {
assert_match_at("(?<=(?i)ab)cd", "ABcd", [[2,4]])