From 1fa449721b12340417095117fc0b846b92c46e7e Mon Sep 17 00:00:00 2001 From: yangwentong <425822674@qq.com> Date: Wed, 26 Oct 2022 21:58:12 +0800 Subject: [PATCH] =?UTF-8?q?=E5=B0=86=E5=8E=9Fre2.cc=E4=B8=AD=E7=9A=84rewri?= =?UTF-8?q?te=5Fre2=5Fto=5Frure=E5=87=BD=E6=95=B0=E4=BD=BF=E7=94=A8Rust?= =?UTF-8?q?=E5=AE=9E=E7=8E=B0=EF=BC=8C=E4=B8=BArure=5Frewrite=5Fstr=5Fconv?= =?UTF-8?q?ert?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- re2/re2.cc | 28 +++----------------------- regex-capi/include/rure.h | 5 +++++ regex-capi/src/rure.rs | 41 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 25 deletions(-) diff --git a/re2/re2.cc b/re2/re2.cc index 2949553..524d47c 100644 --- a/re2/re2.cc +++ b/re2/re2.cc @@ -312,29 +312,6 @@ namespace re2 } } - // 处理Rewrite 将所有的 //number 转换为 ${number} - std::string rewrite_re2_to_rure(re2::StringPiece rewrite) - { - std::string rure_rewrite; - for (const char *s = rewrite.data(), *end = s + rewrite.size(); - s < end; s++) - { - if (*s != '\\') - { - rure_rewrite.push_back(*s); - continue; - } - s++; - int c = (s < end) ? *s : -1; - if (isdigit(c)) - { - rure_rewrite.append("${"); - rure_rewrite.push_back(c); - rure_rewrite.push_back('}'); - } - } - return rure_rewrite; - } bool RE2::Replace(std::string *str, const RE2 &re, @@ -351,7 +328,8 @@ namespace re2 // 利用rure进行replace const char *rure_str = re.pattern_.c_str(); // 对rewrite进行处理 - const char *rure_rewrite = rewrite_re2_to_rure(rewrite).c_str(); + const char *rure_rewrite = rure_rewrite_str_convert((const uint8_t*)rewrite.data(), rewrite.size()); + rure *re_rure = rure_compile((const uint8_t *)rure_str, strlen(rure_str), RURE_DEFAULT_FLAGS, NULL, NULL); const char *str_rure = rure_replace(re_rure, (const uint8_t *)str->c_str(), strlen(str->c_str()), (const uint8_t *)rure_rewrite, strlen(rure_rewrite)); @@ -397,7 +375,7 @@ namespace re2 if (count != 0) { // 对rewrite进行处理 - const char *rure_rewrite = rewrite_re2_to_rure(rewrite).c_str(); + const char *rure_rewrite = rure_rewrite_str_convert((const uint8_t*)rewrite.data(), rewrite.size()); const char *str_rure = rure_replace_all(re_rure, (const uint8_t *)str->c_str(), strlen(str->c_str()), (const uint8_t *)rure_rewrite, strlen(rure_rewrite)); *str = str_rure; diff --git a/regex-capi/include/rure.h b/regex-capi/include/rure.h index f9abf43..32443b5 100644 --- a/regex-capi/include/rure.h +++ b/regex-capi/include/rure.h @@ -613,6 +613,11 @@ int rure_max_submatch(const char *rewrite); bool rure_check_rewrite_string(const char *rewrite, int max_token); +/* + * Convert RE2 style rewrite string to a string that Rust can accept +*/ +const char *rure_rewrite_str_convert(const uint8_t *rewrite, size_t len); + #ifdef __cplusplus } #endif \ No newline at end of file diff --git a/regex-capi/src/rure.rs b/regex-capi/src/rure.rs index 0636326..a101770 100644 --- a/regex-capi/src/rure.rs +++ b/regex-capi/src/rure.rs @@ -793,3 +793,44 @@ ffi_fn! { return true; } } + +ffi_fn! { + fn rure_rewrite_str_convert(rewrite: *const u8, length: size_t) -> *const c_char { + let rewrite = unsafe { slice::from_raw_parts(rewrite, length) }; + let rewrite_str = std::str::from_utf8(rewrite).unwrap(); + let rewrite_chars = rewrite_str.chars().collect::>(); + let mut i = 0; + let mut rewrite_rure_str = String::new(); + while i < rewrite_chars.len() { + if rewrite_chars[i] != '\\' { + rewrite_rure_str.push(rewrite_chars[i]); + i += 1; + continue; + } + { + i += 1; + let c = { + if i < rewrite_chars.len() { + rewrite_chars[i] + } else { + '#' + } + }; + if c.is_ascii_digit() { + rewrite_rure_str.push_str("${"); + rewrite_rure_str.push(c); + rewrite_rure_str.push('}'); + } + } + i += 1; + } + let rure_str = match CString::new(rewrite_rure_str) { + Ok(val) => val, + Err(err) => { + println!("{}", err); + return ptr::null(); + }, + }; + rure_str.into_raw() as *const c_char + } +} \ No newline at end of file -- Gitee