From ab43b394ac2044fb6d3f7d3d0cde552e9fc5ecbe Mon Sep 17 00:00:00 2001 From: yangwentong <425822674@qq.com> Date: Fri, 28 Oct 2022 01:09:02 +0800 Subject: [PATCH] =?UTF-8?q?re2.cc=E4=B8=AD=E7=9A=84Rewrite=E4=BD=BF?= =?UTF-8?q?=E7=94=A8Rust=E5=AE=9E=E7=8E=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- re2/re2.cc | 50 +++++++----------------- regex-capi/include/rure.h | 6 +++ regex-capi/src/rure.rs | 81 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 100 insertions(+), 37 deletions(-) diff --git a/re2/re2.cc b/re2/re2.cc index 524d47c..e36c494 100644 --- a/re2/re2.cc +++ b/re2/re2.cc @@ -734,44 +734,20 @@ namespace re2 const StringPiece *vec, int veclen) const { - for (const char *s = rewrite.data(), *end = s + rewrite.size(); - s < end; s++) - { - if (*s != '\\') - { - out->push_back(*s); - continue; - } - s++; - int c = (s < end) ? *s : -1; - if (isdigit(c)) - { - int n = (c - '0'); - if (n >= veclen) - { - if (options_.log_errors()) - { - LOG(ERROR) << "invalid substitution \\" << n - << " from " << veclen << " groups"; - } - return false; - } - StringPiece snip = vec[n]; - if (!snip.empty()) - out->append(snip.data(), snip.size()); - } - else if (c == '\\') - { - out->push_back('\\'); - } - else - { - if (options_.log_errors()) - LOG(ERROR) << "invalid rewrite pattern: " << rewrite.data(); - return false; - } + size_t len = rewrite.length(); + const char *rewrites[veclen]; + size_t rewrites_lengths[veclen]; + for(int i = 0; i < veclen; i++) { + rewrites[i] = vec[i].data(); + rewrites_lengths[i] = vec[i].size(); } - return true; + const char *result = rure_rewrite((const uint8_t *)rewrite.data(), len, (const uint8_t **)rewrites, + rewrites_lengths, (size_t)veclen); + if(result != NULL) { + out->assign(result); + return true; + } + return false; } /***** Parsers for various types *****/ diff --git a/regex-capi/include/rure.h b/regex-capi/include/rure.h index 32443b5..31d719a 100644 --- a/regex-capi/include/rure.h +++ b/regex-capi/include/rure.h @@ -618,6 +618,12 @@ bool rure_check_rewrite_string(const char *rewrite, int max_token); */ const char *rure_rewrite_str_convert(const uint8_t *rewrite, size_t len); +/* + * Similar to Rewrite function in RE2. +*/ +const char *rure_rewrite(const uint8_t *rewrite, size_t len, const uint8_t **vecs, + const size_t *vecs_lengths, size_t vecs_count); + #ifdef __cplusplus } #endif \ No newline at end of file diff --git a/regex-capi/src/rure.rs b/regex-capi/src/rure.rs index a101770..eb17613 100644 --- a/regex-capi/src/rure.rs +++ b/regex-capi/src/rure.rs @@ -833,4 +833,85 @@ ffi_fn! { }; rure_str.into_raw() as *const c_char } +} + +ffi_fn! { + fn rure_rewrite( + rewrite: *const u8, + length: size_t, + vecs: *const *const u8, + vecs_lengths: *const size_t, + vecs_count: size_t + ) -> *const c_char { + // 获取rewrite + let rewrite = unsafe { slice::from_raw_parts(rewrite, length) }; + let rewrite_str = std::str::from_utf8(rewrite).unwrap(); + + //获取vecs中的内容 + let (raw_vecs, raw_vecsl) = unsafe { + ( + slice::from_raw_parts(vecs, vecs_count), + slice::from_raw_parts(vecs_lengths, vecs_count) + ) + }; + + let mut rure_vecs = Vec::with_capacity(vecs_count); + for (&raw_vec, &raw_vecl) in raw_vecs.iter().zip(raw_vecsl) { + let rure_vec = unsafe { slice::from_raw_parts(raw_vec, raw_vecl) }; + rure_vecs.push(str::from_utf8(rure_vec).unwrap()); + // let elem = String::from_utf8(rure_vec).unwrap();; + } + for i in 0..rure_vecs.len() { + println!("{}, ", rure_vecs[i]); + } + + let rewrite_chars = rewrite_str.chars().collect::>(); + let mut i = 0; + // let outl = unsafe { slice::from_raw_parts(rure_out, rure_out_len) }; + // let mut out = std::str::from_utf8(outl).unwrap().to_string(); + + let mut out = String::new(); + while i < rewrite_chars.len() { + if rewrite_chars[i] != '\\' { + out.push(rewrite_chars[i]); + i += 1; + continue; + } + i += 1; + let c = { + if i < rewrite_chars.len() { + rewrite_chars[i] + } else { + '~' + } + }; + // let n + if c.is_ascii_digit() { + let n = c as usize - '0' as usize; + if n >= vecs_count { + return ptr::null(); + } + let elem = rure_vecs[n]; + if !elem.is_empty() { + out.push_str(elem); + } + i += 1; + } else if rewrite_chars[i] == '\\' { + out.push('\\'); + i += 1; + } else { + return ptr::null(); + } + } + println!("{}", out); + // out.as_p + let out = match CString::new(out) { + Ok(val) => val, + Err(err) => { + println!("{}", err); + return ptr::null(); + }, + }; + out.into_raw() as *const c_char + } } \ No newline at end of file -- Gitee