From 5a1652452910e9e6024dc67a313e4ca517084b91 Mon Sep 17 00:00:00 2001 From: yangwentong <425822674@qq.com> Date: Sun, 16 Oct 2022 14:21:34 +0800 Subject: [PATCH 1/2] =?UTF-8?q?CheckRewriteString=E6=94=B9=E4=B8=BA?= =?UTF-8?q?=E7=94=B1Rust=E5=AE=9E=E7=8E=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- re2/re2.cc | 86 +++++++++++++++++++++------------------ regex-capi/include/rure.h | 2 + regex-capi/src/rure.rs | 46 +++++++++++++++++++++ 3 files changed, 95 insertions(+), 39 deletions(-) diff --git a/re2/re2.cc b/re2/re2.cc index 91d9880..a5c0e3b 100644 --- a/re2/re2.cc +++ b/re2/re2.cc @@ -976,47 +976,55 @@ namespace re2 bool RE2::CheckRewriteString(const StringPiece &rewrite, std::string *error) const { - int max_token = -1; - for (const char *s = rewrite.data(), *end = s + rewrite.size(); - s < end; s++) - { - int c = *s; - if (c != '\\') - { - continue; - } - if (++s == end) - { - *error = "Rewrite schema error: '\\' not allowed at end."; - return false; - } - c = *s; - if (c == '\\') - { - continue; - } - if (!isdigit(c)) - { - *error = "Rewrite schema error: " - "'\\' must be followed by a digit or '\\'."; - return false; - } - int n = (c - '0'); - if (max_token < n) - { - max_token = n; - } - } - - if (max_token > NumberOfCapturingGroups()) - { - // *error = StringPrintf( - // "Rewrite schema requests %d matches, but the regexp only has %d " - // "parenthesized subexpressions.", - // max_token, NumberOfCapturingGroups()); + int num_caps = NumberOfCapturingGroups(); + bool result = rure_check_rewrite_string(rewrite.data(), num_caps); + if(!result){ + *error = "Rewrite schema error"; return false; } - return true; + return true; + + + // for (const char *s = rewrite.data(), *end = s + rewrite.size(); + // s < end; s++) + // { + // int c = *s; + // if (c != '\\') + // { + // continue; + // } + // if (++s == end) + // { + // *error = "Rewrite schema error: '\\' not allowed at end."; + // return false; + // } + // c = *s; + // if (c == '\\') + // { + // continue; + // } + // if (!isdigit(c)) + // { + // *error = "Rewrite schema error: " + // "'\\' must be followed by a digit or '\\'."; + // return false; + // } + // int n = (c - '0'); + // if (max_token < n) + // { + // max_token = n; + // } + // } + + // if (max_token > NumberOfCapturingGroups()) + // { + // // *error = StringPrintf( + // // "Rewrite schema requests %d matches, but the regexp only has %d " + // // "parenthesized subexpressions.", + // // max_token, NumberOfCapturingGroups()); + // return false; + // } + // return true; } // Returns the maximum submatch needed for the rewrite to be done by Replace(). diff --git a/regex-capi/include/rure.h b/regex-capi/include/rure.h index 6719486..c8e2af5 100644 --- a/regex-capi/include/rure.h +++ b/regex-capi/include/rure.h @@ -611,6 +611,8 @@ const char *rure_replace_all(rure *re, const uint8_t *haystack, size_t len_h, rure *rure_new(const uint8_t *pattern, size_t length); bool rure_consume(rure *re, const uint8_t *haystack, size_t length, rure_match *match); int rure_max_submatch(const char *rewrite); +bool rure_check_rewrite_string(const char *rewrite, int max_token); + #ifdef __cplusplus } diff --git a/regex-capi/src/rure.rs b/regex-capi/src/rure.rs index b88ffc4..0636326 100644 --- a/regex-capi/src/rure.rs +++ b/regex-capi/src/rure.rs @@ -747,3 +747,49 @@ ffi_fn! { max } } + +ffi_fn! { + fn rure_check_rewrite_string(rewrite: *const c_char, cap_num: i32) -> bool { + let len = unsafe { CStr::from_ptr(rewrite).to_bytes().len() }; + let pat = rewrite as *const u8; + let text = unsafe { slice::from_raw_parts(pat, len) }; + let s = std::str::from_utf8(text).unwrap(); + let mut max_token = -1; + let chars = s.chars().collect::>(); + let mut i = 0; + while i < chars.len() { + if chars[i] != '\\' { + i += 1; + continue; + } + i += 1; + + if i == chars.len() { + println!("Rewrite schema error: '\\' not allowed at end."); + return false; + } + // i += 1; + if chars[i] == '\\' { + i += 1; + continue; + } + if !chars[i].is_ascii_digit() { + println!("'\\' must be followed by a digit or '\\'."); + return false; + } + + let n = chars[i] as i32 - '0' as i32; + println!("n = {}", n); + i += 1; + + if n > max_token { + max_token = n; + } + } + + if max_token > cap_num { + return false; + } + return true; + } +} -- Gitee From 8b9e96b27e2722ae1b2602c6e2622f75121b072d Mon Sep 17 00:00:00 2001 From: yangwentong <425822674@qq.com> Date: Sun, 16 Oct 2022 14:24:42 +0800 Subject: [PATCH 2/2] =?UTF-8?q?=E5=88=A0=E9=99=A4CheckRewriteString?= =?UTF-8?q?=E7=9A=84=E6=97=A0=E5=85=B3=E6=B3=A8=E9=87=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- re2/re2.cc | 41 ----------------------------------------- 1 file changed, 41 deletions(-) diff --git a/re2/re2.cc b/re2/re2.cc index a5c0e3b..3285dc0 100644 --- a/re2/re2.cc +++ b/re2/re2.cc @@ -984,47 +984,6 @@ namespace re2 } return true; - - // for (const char *s = rewrite.data(), *end = s + rewrite.size(); - // s < end; s++) - // { - // int c = *s; - // if (c != '\\') - // { - // continue; - // } - // if (++s == end) - // { - // *error = "Rewrite schema error: '\\' not allowed at end."; - // return false; - // } - // c = *s; - // if (c == '\\') - // { - // continue; - // } - // if (!isdigit(c)) - // { - // *error = "Rewrite schema error: " - // "'\\' must be followed by a digit or '\\'."; - // return false; - // } - // int n = (c - '0'); - // if (max_token < n) - // { - // max_token = n; - // } - // } - - // if (max_token > NumberOfCapturingGroups()) - // { - // // *error = StringPrintf( - // // "Rewrite schema requests %d matches, but the regexp only has %d " - // // "parenthesized subexpressions.", - // // max_token, NumberOfCapturingGroups()); - // return false; - // } - // return true; } // Returns the maximum submatch needed for the rewrite to be done by Replace(). -- Gitee