From 6c098c5d443a8e12860defac4ab62f919f1aa394 Mon Sep 17 00:00:00 2001
From: yangwentong <425822674@qq.com>
Date: Fri, 25 Nov 2022 22:15:39 +0800
Subject: [PATCH] =?UTF-8?q?=E5=B0=86=E5=90=8CC=E7=B1=BB=E5=9E=8B=E8=BD=AC?=
 =?UTF-8?q?=E6=8D=A2=E7=9A=84=E9=80=BB=E8=BE=91=E4=B8=8E=E4=B8=9A=E5=8A=A1?=
 =?UTF-8?q?=E5=AE=9E=E7=8E=B0=E9=80=BB=E8=BE=91=E5=88=86=E7=A6=BB,=20rure?=
 =?UTF-8?q?=E7=9A=84=E6=8E=A5=E5=8F=A3=E4=BB=A3=E7=A0=81=E6=94=BE=E5=9C=A8?=
 =?UTF-8?q?lib.rs=E4=B8=AD,=20rure.rs=E6=94=B9=E4=B8=BAlib=5Finternal.rs?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 regex-capi/ctest/test.c                     | 171 ------
 regex-capi/include/rure.h                   | 129 ----
 regex-capi/src/lib.rs                       | 520 +++++++++++++++-
 regex-capi/src/{rure.rs => lib_internal.rs} | 644 ++------------------
 4 files changed, 564 insertions(+), 900 deletions(-)
 rename regex-capi/src/{rure.rs => lib_internal.rs} (51%)

diff --git a/regex-capi/ctest/test.c b/regex-capi/ctest/test.c
index 86bf1dc..a75e23b 100644
--- a/regex-capi/ctest/test.c
+++ b/regex-capi/ctest/test.c
@@ -28,34 +28,6 @@ bool test_is_match() {
     return passed;
 }
 
-bool test_shortest_match() {
-    bool passed = true;
-    const char *haystack = "aaaaa";
-
-    rure *re = rure_compile_must("a+");
-    size_t end = 0;
-    bool matched = rure_shortest_match(re, (const uint8_t *)haystack,
-                                       strlen(haystack), 0, &end);
-    if (!matched) {
-        if (DEBUG) {
-            fprintf(stderr,
-                    "[test_shortest_match] expected match, "
-                    "but got no match\n");
-        }
-        passed = false;
-    }
-    size_t expect_end = 1;
-    if (end != expect_end) {
-        if (DEBUG) {
-            fprintf(stderr,
-                    "[test_shortest_match] expected match end location %zu "
-                    "but got %zu\n", expect_end, end);
-        }
-        passed = false;
-    }
-    rure_free(re);
-    return passed;
-}
 
 bool test_find() {
     bool passed = true;
@@ -115,19 +87,6 @@ bool test_captures() {
         passed = false;
         goto done;
     }
-    int32_t expect_capture_index = 2;
-    int32_t capture_index = rure_capture_name_index(re, "snowman");
-    if (capture_index != expect_capture_index) {
-        if (DEBUG) {
-            fprintf(stderr,
-                    "[test_captures] "
-                    "expected capture index %d for name 'snowman', but "
-                    "got %d\n",
-                    expect_capture_index, capture_index);
-        }
-        passed = false;
-        goto done;
-    }
     size_t expect_start = 9;
     size_t expect_end = 12;
     rure_captures_at(caps, 2, &match);
@@ -147,67 +106,6 @@ done:
     return passed;
 }
 
-bool test_iter() {
-    bool passed = true;
-    const uint8_t *haystack = (const uint8_t *)"abc xyz";
-    size_t haystack_len = strlen((const char *)haystack);
-
-    rure *re = rure_compile_must("\\w+(\\w)");
-    rure_match match = {0};
-    rure_captures *caps = rure_captures_new(re);
-    rure_iter *it = rure_iter_new(re);
-
-    bool matched = rure_iter_next(it, haystack, haystack_len, &match);
-    if (!matched) {
-        if (DEBUG) {
-            fprintf(stderr,
-                    "[test_iter] expected first match, but got no match\n");
-        }
-        passed = false;
-        goto done;
-    }
-    size_t expect_start = 0;
-    size_t expect_end = 3;
-    if (match.start != expect_start || match.end != expect_end) {
-        if (DEBUG) {
-            fprintf(stderr,
-                    "[test_iter] expected first match at (%zu, %zu), but "
-                    "got match at (%zu, %zu)\n",
-                    expect_start, expect_end, match.start, match.end);
-        }
-        passed = false;
-        goto done;
-    }
-
-    matched = rure_iter_next_captures(it, haystack, haystack_len, caps);
-    if (!matched) {
-        if (DEBUG) {
-            fprintf(stderr,
-                    "[test_iter] expected second match, but got no match\n");
-        }
-        passed = false;
-        goto done;
-    }
-    rure_captures_at(caps, 1, &match);
-    expect_start = 6;
-    expect_end = 7;
-    if (match.start != expect_start || match.end != expect_end) {
-        if (DEBUG) {
-            fprintf(stderr,
-                    "[test_iter] expected second match at (%zu, %zu), but "
-                    "got match at (%zu, %zu)\n",
-                    expect_start, expect_end, match.start, match.end);
-        }
-        passed = false;
-        goto done;
-    }
-done:
-    rure_iter_free(it);
-    rure_captures_free(caps);
-    rure_free(re);
-    return passed;
-}
-
 bool test_iter_capture_name(char *expect, char *given) {
     bool passed = true;
     if (strcmp(expect, given)) {
@@ -316,35 +214,6 @@ bool test_compile_error() {
     return passed;
 }
 
-bool test_compile_error_size_limit() {
-    bool passed = true;
-    rure_options *opts = rure_options_new();
-    rure_options_size_limit(opts, 0);
-    rure_error *err = rure_error_new();
-    rure *re = rure_compile((const uint8_t *)"\\w{100}", 8, 0, opts, err);
-    if (re != NULL) {
-        if (DEBUG) {
-            fprintf(stderr,
-                    "[test_compile_error_size_limit] "
-                    "expected NULL regex pointer, but got non-NULL pointer\n");
-        }
-        passed = false;
-        rure_free(re);
-    }
-    const char *msg = rure_error_message(err);
-    if (NULL == strstr(msg, "exceeds size")) {
-        if (DEBUG) {
-            fprintf(stderr,
-                    "[test_compile_error] "
-                    "expected an 'exceeds size' error message, but "
-                    "got this instead: '%s'\n", msg);
-        }
-        passed = false;
-    }
-    rure_options_free(opts);
-    rure_error_free(err);
-    return passed;
-}
 
 bool test_regex_set_matches() {
 
@@ -495,41 +364,6 @@ done2:
 #undef PAT_COUNT
 }
 
-bool test_regex_set_options() {
-
-    bool passed = true;
-    rure_options *opts = rure_options_new();
-    rure_options_size_limit(opts, 0);
-    rure_error *err = rure_error_new();
-
-    const char *patterns[] = { "\\w{100}" };
-    const size_t patterns_lengths[] = { 8 };
-
-    rure_set *re = rure_compile_set(
-        (const uint8_t **) patterns, patterns_lengths, 1, 0, opts, err);
-    if (re != NULL) {
-        if (DEBUG) {
-            fprintf(stderr,
-                    "[test_compile_error_size_limit] "
-                    "expected NULL regex pointer, but got non-NULL pointer\n");
-        }
-        passed = false;
-        rure_set_free(re);
-    }
-    const char *msg = rure_error_message(err);
-    if (NULL == strstr(msg, "exceeds size")) {
-        if (DEBUG) {
-            fprintf(stderr,
-                    "[test_compile_error] "
-                    "expected an 'exceeds size' error message, but "
-                    "got this instead: '%s'\n", msg);
-        }
-        passed = false;
-    }
-    rure_options_free(opts);
-    rure_error_free(err);
-    return passed;
-}
 
 bool test_escape() {
     bool passed = true;
@@ -673,17 +507,12 @@ int main() {
     bool passed = true;
 
     run_test(test_is_match, "test_is_match", &passed);
-    run_test(test_shortest_match, "test_shortest_match", &passed);
     run_test(test_find, "test_find", &passed);
     run_test(test_captures, "test_captures", &passed);
-    run_test(test_iter, "test_iter", &passed);
     run_test(test_iter_capture_names, "test_iter_capture_names", &passed);
     run_test(test_flags, "test_flags", &passed);
     run_test(test_compile_error, "test_compile_error", &passed);
-    run_test(test_compile_error_size_limit, "test_compile_error_size_limit",
-             &passed);
     run_test(test_regex_set_matches, "test_regex_set_match", &passed);
-    run_test(test_regex_set_options, "test_regex_set_options", &passed);
     run_test(test_regex_set_match_start, "test_regex_set_match_start",
              &passed);
     run_test(test_escape, "test_escape", &passed);
diff --git a/regex-capi/include/rure.h b/regex-capi/include/rure.h
index 286421e..5ba573f 100644
--- a/regex-capi/include/rure.h
+++ b/regex-capi/include/rure.h
@@ -262,41 +262,7 @@ bool rure_find(rure *re, const uint8_t *haystack, size_t length,
 bool rure_find_captures(rure *re, const uint8_t *haystack, size_t length,
                         size_t start, rure_captures *captures);
 
-/*
- * rure_shortest_match returns true if and only if re matches anywhere in
- * haystack. If a match is found, then its end location is stored in the
- * pointer given. The end location is the place at which the regex engine
- * determined that a match exists, but may occur before the end of the proper
- * leftmost-first match.
- *
- * haystack may contain arbitrary bytes, but ASCII compatible text is more
- * useful. UTF-8 is even more useful. Other text encodings aren't supported.
- * length should be the number of bytes in haystack.
- *
- * start is the position at which to start searching. Note that setting the
- * start position is distinct from incrementing the pointer, since the regex
- * engine may look at bytes before the start position to determine match
- * information. For example, if the start position is greater than 0, then the
- * \A ("begin text") anchor can never match.
- *
- * rure_shortest_match should be preferred to rure_find since it may be faster.
- *
- * N.B. The performance of this search is not impacted by the presence of
- * capturing groups in your regular expression.
- */
-bool rure_shortest_match(rure *re, const uint8_t *haystack, size_t length,
-                         size_t start, size_t *end);
 
-/*
- * rure_capture_name_index returns the capture index for the name given. If
- * no such named capturing group exists in re, then -1 is returned.
- *
- * The capture index may be used with rure_captures_at.
- *
- * This function never returns 0 since the first capture group always
- * corresponds to the entire match and is always unnamed.
- */
-int32_t rure_capture_name_index(rure *re, const char *name);
 
 /*
  * rure_iter_capture_names_new creates a new capture_names iterator.
@@ -320,15 +286,6 @@ void rure_iter_capture_names_free(rure_iter_capture_names *it);
  */
 bool rure_iter_capture_names_next(rure_iter_capture_names *it, char **name);
 
-/*
- * rure_iter_new creates a new iterator.
- *
- * An iterator will report all successive non-overlapping matches of re.
- * When calling iterator functions, the same haystack and length must be
- * supplied to all invocations. (Strict pointer equality is, however, not
- * required.)
- */
-rure_iter *rure_iter_new(rure *re);
 
 /*
  * rure_iter_free frees the iterator given.
@@ -337,52 +294,7 @@ rure_iter *rure_iter_new(rure *re);
  */
 void rure_iter_free(rure_iter *it);
 
-/*
- * rure_iter_next advances the iterator and returns true if and only if a
- * match was found. If a match is found, then the match pointer is set with the
- * start and end location of the match, in bytes.
- *
- * If no match is found, then subsequent calls will return false indefinitely.
- *
- * haystack may contain arbitrary bytes, but ASCII compatible text is more
- * useful. UTF-8 is even more useful. Other text encodings aren't supported.
- * length should be the number of bytes in haystack. The given haystack must
- * be logically equivalent to all other haystacks given to this iterator.
- *
- * rure_iter_next should be preferred to rure_iter_next_captures since it may
- * be faster.
- *
- * N.B. The performance of this search is not impacted by the presence of
- * capturing groups in your regular expression.
- */
-bool rure_iter_next(rure_iter *it, const uint8_t *haystack, size_t length,
-                    rure_match *match);
 
-/*
- * rure_iter_next_captures advances the iterator and returns true if and only if a
- * match was found. If a match is found, then all of its capture locations are
- * stored in the captures pointer given.
- *
- * If no match is found, then subsequent calls will return false indefinitely.
- *
- * haystack may contain arbitrary bytes, but ASCII compatible text is more
- * useful. UTF-8 is even more useful. Other text encodings aren't supported.
- * length should be the number of bytes in haystack. The given haystack must
- * be logically equivalent to all other haystacks given to this iterator.
- *
- * Only use this function if you specifically need access to capture locations.
- * It is not necessary to use this function just because your regular
- * expression contains capturing groups.
- *
- * Capture locations can be accessed using the rure_captures_* functions.
- *
- * N.B. The performance of this search can be impacted by the number of
- * capturing groups. If you're using this function, it may be beneficial to
- * use non-capturing groups (e.g., `(?:re)`) where possible.
- */
-bool rure_iter_next_captures(rure_iter *it,
-                             const uint8_t *haystack, size_t length,
-                             rure_captures *captures);
 
 /*
  * rure_captures_new allocates storage for all capturing groups in re.
@@ -424,48 +336,7 @@ bool rure_captures_at(rure_captures *captures, size_t i, rure_match *match);
  */
 size_t rure_captures_len(rure_captures *captures);
 
-/*
- * rure_options_new allocates space for options.
- *
- * Options may be freed immediately after a call to rure_compile, but otherwise
- * may be freely used in multiple calls to rure_compile.
- *
- * It is not safe to set options from multiple threads simultaneously. It is
- * safe to call rure_compile from multiple threads simultaneously using the
- * same options pointer.
- */
-rure_options *rure_options_new();
-
-/*
- * rure_options_free frees the given options.
- *
- * This must be called at most once.
- */
-void rure_options_free(rure_options *options);
 
-/*
- * rure_options_size_limit sets the appoximate size limit of the compiled
- * regular expression.
- *
- * This size limit roughly corresponds to the number of bytes occupied by a
- * single compiled program. If the program would exceed this number, then a
- * compilation error will be returned from rure_compile.
- */
-void rure_options_size_limit(rure_options *options, size_t limit);
-
-/*
- * rure_options_dfa_size_limit sets the approximate size of the cache used by
- * the DFA during search.
- *
- * This roughly corresponds to the number of bytes that the DFA will use while
- * searching.
- *
- * Note that this is a *per thread* limit. There is no way to set a global
- * limit. In particular, if a regular expression is used from multiple threads
- * simultaneously, then each thread may use up to the number of bytes
- * specified here.
- */
-void rure_options_dfa_size_limit(rure_options *options, size_t limit);
 
 /*
  * rure_compile_set compiles the given list of patterns into a single regular
diff --git a/regex-capi/src/lib.rs b/regex-capi/src/lib.rs
index 59f9681..d28bae5 100644
--- a/regex-capi/src/lib.rs
+++ b/regex-capi/src/lib.rs
@@ -1,6 +1,516 @@
-#[macro_use]
-mod error;
-mod rure;
+/******************************************************************************
+ * Copyright (c) USTC(Suzhou) & Huawei Technologies Co., Ltd. 2022. All rights reserved.
+ * re2-rust licensed under the Mulan PSL v2.
+ * You can use this software according to the terms and conditions of the Mulan PSL v2.
+ * You may obtain a copy of Mulan PSL v2 at:
+ *     http://license.coscl.org.cn/MulanPSL2
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+ * PURPOSE.
+ * See the Mulan PSL v2 for more details.
+ * Author: mengning<mengning@ustc.edu.cn>, liuzhitao<freekeeper@mail.ustc.edu.cn>, yangwentong<ywt0821@163.com>
+ * Create: 2022-11-25
+ * Description: Rure is a C API to Rust's regex library.
+ ******************************************************************************/
+ #[macro_use]
+ mod error;
+ pub use crate::error::*;
+
+ use std::ffi::{CStr, CString};
+ use std::ops::Deref;
+ use std::ptr;
+ use std::slice;
+ use std::str;
+ 
+ use libc::{c_char, size_t};
+ use regex::bytes::CaptureLocations;
+ use regex::{bytes, Regex};
+ 
+ use crate::error::{Error, ErrorKind};
+ use std::io;
+ use std::io::Write;
+ use std::process::abort;
+ 
+ include!("lib_internal.rs");
+ 
+ const RURE_FLAG_CASEI: u32 = 1 << 0;
+ const RURE_FLAG_MULTI: u32 = 1 << 1;
+ const RURE_FLAG_DOTNL: u32 = 1 << 2;
+ const RURE_FLAG_SWAP_GREED: u32 = 1 << 3;
+ const RURE_FLAG_SPACE: u32 = 1 << 4;
+ const RURE_FLAG_UNICODE: u32 = 1 << 5;
+ const RURE_DEFAULT_FLAGS: u32 = RURE_FLAG_UNICODE;
+ 
+ pub struct RegexBytes {
+     re: bytes::Regex,
+     // capture_names: HashMap<String, i32>,
+ }
+ 
+ pub struct RegexUnicode {
+     re: Regex,
+ }
+ 
+ pub struct Options {
+     size_limit: usize,
+     dfa_size_limit: usize,
+ }
+ 
+ // The `RegexSet` is not exposed with option support or matching at an
+ // arbitrary position with a crate just yet. To circumvent this, we use
+ // the `Exec` structure directly.
+ pub struct RegexSet {
+     re: bytes::RegexSet,
+ }
+ 
+ #[repr(C)]
+ pub struct rure_match {
+     pub start: size_t,
+     pub end: size_t,
+ }
+ 
+ pub struct Captures(bytes::Locations);
+ 
+ pub struct IterCaptureNames {
+     capture_names: bytes::CaptureNames<'static>,
+     name_ptrs: Vec<*mut c_char>,
+ }
+ 
+ #[repr(C)]
+ pub struct Atoms {
+     atom: *mut c_char,
+ }
+ 
+ #[repr(C)]
+ pub struct MyVec {
+     data: *mut Atoms,
+     len: i32,
+ }
+ 
+ impl Deref for RegexBytes {
+     type Target = bytes::Regex;
+     fn deref(&self) -> &bytes::Regex {
+         &self.re
+     }
+ }
+ 
+ impl Deref for RegexUnicode {
+     type Target = Regex;
+     fn deref(&self) -> &Regex {
+         &self.re
+     }
+ }
+ 
+ impl Deref for RegexSet {
+     type Target = bytes::RegexSet;
+     fn deref(&self) -> &bytes::RegexSet {
+         &self.re
+     }
+ }
+ 
+ impl Default for Options {
+     fn default() -> Options {
+         Options {
+             size_limit: 10 * (1 << 20),
+             dfa_size_limit: 2 * (1 << 20),
+         }
+     }
+ }
+ 
+ #[no_mangle]
+ extern "C" fn rure_compile_must(pattern: *const c_char) -> *const RegexBytes {
+     let len = unsafe { CStr::from_ptr(pattern).to_bytes().len() };
+     let pat = pattern as *const u8;
+     let mut err = Error::new(ErrorKind::None);
+     let re = rure_compile(pat, len, RURE_DEFAULT_FLAGS, ptr::null(), &mut err);
+     if err.is_err() {
+         let _ = writeln!(&mut io::stderr(), "{}", err);
+         let _ = writeln!(&mut io::stderr(), "aborting from rure_compile_must");
+         abort()
+     }
+     re
+ }
+ 
+ #[no_mangle]
+ extern "C" fn rure_compile(
+     pattern: *const u8,
+     length: size_t,
+     flags: u32,
+     options: *const Options,
+     error: *mut Error,
+ ) -> *const RegexBytes {
+     let pat = unsafe { slice::from_raw_parts(pattern, length) };
+     let pat = match str::from_utf8(pat) {
+         Ok(pat) => pat,
+         Err(err) => unsafe {
+             if !error.is_null() {
+                 *error = Error::new(ErrorKind::Str(err));
+             }
+             return ptr::null();
+         },
+     };
+     rure_compile_internal(pat, flags, options, error)
+ }
+ 
+ #[no_mangle]
+ extern "C" fn rure_free(re: *const RegexBytes) {
+     unsafe {
+         drop(Box::from_raw(re as *mut Regex));
+     }
+ }
+ 
+ #[no_mangle]
+ extern "C" fn rure_is_match(
+     re: *const RegexBytes,
+     haystack: *const u8,
+     len: size_t,
+     _start: size_t,
+ ) -> bool {
+     let re = unsafe { &*re };
+     let haystack = unsafe { slice::from_raw_parts(haystack, len) };
+     re.is_match(haystack)
+ }
+ 
+ #[no_mangle]
+ extern "C" fn rure_find(
+     re: *const RegexBytes,
+     haystack: *const u8,
+     len: size_t,
+     start: size_t,
+     match_info: *mut rure_match,
+ ) -> bool {
+     let re = unsafe { &*re };
+     let haystack = unsafe { slice::from_raw_parts(haystack, len) };
+     rure_find_internal(re, haystack, start, match_info)
+ }
+ 
+ #[no_mangle]
+ extern "C" fn rure_find_captures(
+     re: *const RegexBytes,
+     haystack: *const u8,
+     len: size_t,
+     start: size_t,
+     captures: *mut Captures,
+ ) -> bool {
+     let re = unsafe { &*re };
+     let haystack = unsafe { slice::from_raw_parts(haystack, len) };
+     let slots = unsafe { &mut (*captures).0 };
+     re.read_captures_at(slots, haystack, start).is_some()
+ }
+ 
+ #[no_mangle]
+ extern "C" fn rure_iter_capture_names_new(re: *const RegexBytes) -> *mut IterCaptureNames {
+     let re = unsafe { &*re };
+     Box::into_raw(Box::new(IterCaptureNames {
+         capture_names: re.re.capture_names(),
+         name_ptrs: Vec::new(),
+     }))
+ }
+ 
+ #[no_mangle]
+ extern "C" fn rure_iter_capture_names_free(it: *mut IterCaptureNames) {
+     unsafe {
+         let it = &mut *it;
+         while let Some(ptr) = it.name_ptrs.pop() {
+             drop(CString::from_raw(ptr));
+         }
+         drop(Box::from_raw(it));
+     }
+ }
+ 
+ #[no_mangle]
+ extern "C" fn rure_iter_capture_names_next(
+     it: *mut IterCaptureNames,
+     capture_name: *mut *mut c_char,
+ ) -> bool {
+     if capture_name.is_null() {
+         return false;
+     }
+     let it = unsafe { &mut *it };
+     let cn = match it.capture_names.next() {
+         // Top-level iterator ran out of capture groups
+         None => return false,
+         Some(val) => {
+             match val {
+                 // inner Option didn't have a name
+                 None => "",
+                 Some(name) => name,
+             }
+         }
+     };
+     unsafe {
+         let cs = match CString::new(cn.as_bytes()) {
+             Result::Ok(val) => val,
+             Result::Err(_) => return false,
+         };
+         let ptr = cs.into_raw();
+         it.name_ptrs.push(ptr);
+         *capture_name = ptr;
+     }
+     true
+ }
+ 
+ #[no_mangle]
+ extern "C" fn rure_captures_new(re: *const RegexBytes) -> *mut Captures {
+     let re = unsafe { &*re };
+     let captures = Captures(re.locations());
+     Box::into_raw(Box::new(captures))
+ }
+ 
+ #[no_mangle]
+ extern "C" fn rure_captures_free(captures: *const Captures) {
+     unsafe {
+         drop(Box::from_raw(captures as *mut Captures));
+     }
+ }
+ 
+ #[no_mangle]
+ extern "C" fn rure_captures_at(
+     captures: *const Captures,
+     i: size_t,
+     match_info: *mut rure_match,
+ ) -> bool {
+     let locs = unsafe { &(*captures).0 };
+     rure_captures_at_internal(locs, i, match_info)
+ }
+ 
+ #[no_mangle]
+ extern "C" fn rure_captures_len(captures: *const Captures) -> size_t {
+     unsafe { (*captures).0.len() }
+ }
+ 
+ #[no_mangle]
+ extern "C" fn rure_compile_set(
+     patterns: *const *const u8,
+     patterns_lengths: *const size_t,
+     patterns_count: size_t,
+     flags: u32,
+     options: *const Options,
+     error: *mut Error,
+ ) -> *const RegexSet {
+     let (raw_pats, raw_patsl) = unsafe {
+         (
+             slice::from_raw_parts(patterns, patterns_count),
+             slice::from_raw_parts(patterns_lengths, patterns_count),
+         )
+     };
+     rure_compile_set_internal(raw_pats, raw_patsl, patterns_count, flags, options, error)
+ }
+ 
+ #[no_mangle]
+ extern "C" fn rure_set_free(re: *const RegexSet) {
+     unsafe {
+         drop(Box::from_raw(re as *mut RegexSet));
+     }
+ }
+ 
+ #[no_mangle]
+ extern "C" fn rure_set_is_match(
+     re: *const RegexSet,
+     haystack: *const u8,
+     len: size_t,
+     start: size_t,
+ ) -> bool {
+     let re = unsafe { &*re };
+     let haystack = unsafe { slice::from_raw_parts(haystack, len) };
+     re.is_match_at(haystack, start)
+ }
+ 
+ #[no_mangle]
+ extern "C" fn rure_set_matches(
+     re: *const RegexSet,
+     haystack: *const u8,
+     len: size_t,
+     start: size_t,
+     matches: *mut bool,
+ ) -> bool {
+     let re = unsafe { &*re };
+     let matches = unsafe { slice::from_raw_parts_mut(matches, re.len()) };
+     let haystack = unsafe { slice::from_raw_parts(haystack, len) };
+ 
+     rure_set_matches_internal(re, matches, haystack, start)
+ }
+ 
+ #[no_mangle]
+ extern "C" fn rure_set_len(re: *const RegexSet) -> size_t {
+     unsafe { (*re).len() }
+ }
+ 
+ #[no_mangle]
+ extern "C" fn rure_escape_must(pattern: *const c_char) -> *const c_char {
+     let len = unsafe { CStr::from_ptr(pattern).to_bytes().len() };
+     let pat = pattern as *const u8;
+     let mut err = Error::new(ErrorKind::None);
+     let esc = rure_escape(pat, len, &mut err);
+     if err.is_err() {
+         println!("{}", "aborting from rure_escape_must");
+         let _ = writeln!(&mut io::stderr(), "{}", err);
+         let _ = writeln!(&mut io::stderr(), "aborting from rure_escape_must");
+         abort()
+     }
+     esc
+ }
+ 
+ /// A helper function that implements fallible escaping in a way that returns
+ /// an error if escaping failed.
+ ///
+ /// This should ideally be exposed, but it needs API design work. In
+ /// particular, this should not return a C string, but a `const uint8_t *`
+ /// instead, since it may contain a NUL byte.
+ fn rure_escape(pattern: *const u8, length: size_t, error: *mut Error) -> *const c_char {
+     let pat: &[u8] = unsafe { slice::from_raw_parts(pattern, length) };
+     let str_pat = match str::from_utf8(pat) {
+         Ok(val) => val,
+         Err(err) => unsafe {
+             if !error.is_null() {
+                 *error = Error::new(ErrorKind::Str(err));
+             }
+             return ptr::null();
+         },
+     };
+     let esc_pat = regex::escape(str_pat);
+     let c_esc_pat = match CString::new(esc_pat) {
+         Ok(val) => val,
+         Err(err) => unsafe {
+             if !error.is_null() {
+                 *error = Error::new(ErrorKind::Nul(err));
+             }
+             return ptr::null();
+         },
+     };
+     c_esc_pat.into_raw() as *const c_char
+ }
+ 
+ #[no_mangle]
+ extern "C" fn rure_cstring_free(s: *mut c_char) {
+     unsafe {
+         drop(CString::from_raw(s));
+     }
+ }
+ 
+ #[no_mangle]
+ extern "C" fn rure_replace(
+     re: *const RegexUnicode,
+     haystack: *const u8,
+     len_h: size_t,
+     rewrite: *const u8,
+     len_r: size_t,
+ ) -> *const u8 {
+     let re = unsafe { &*re };
+     let haystack = unsafe { slice::from_raw_parts(haystack, len_h) };
+     let rewrite = unsafe { slice::from_raw_parts(rewrite, len_r) };
+     rure_replace_internal(re, haystack, rewrite)
+ }
+ 
+ #[no_mangle]
+ extern "C" fn rure_replace_all(
+     re: *const RegexUnicode,
+     haystack: *const u8,
+     len_h: size_t,
+     rewrite: *const u8,
+     len_r: size_t,
+ ) -> *const u8 {
+     let re = unsafe { &*re };
+     let haystack = unsafe { slice::from_raw_parts(haystack, len_h) };
+     let rewrite = unsafe { slice::from_raw_parts(rewrite, len_r) };
+     rure_replace_all_internal(re, haystack, rewrite)
+ }
+ 
+ /*
+  *  Simple way to use regex
+  */
+ 
+ #[no_mangle]
+ extern "C" fn rure_new(pattern: *const u8, length: size_t) -> *const RegexBytes {
+     let pat = unsafe { slice::from_raw_parts(pattern, length) };
+     rure_new_internal(pat)
+ }
+ 
+ #[no_mangle]
+ extern "C" fn rure_consume(
+     re: *const RegexBytes,
+     haystack: *const u8,
+     len: size_t,
+     match_info: *mut rure_match,
+ ) -> bool {
+     let exp = unsafe { &*re };
+     let haystack = unsafe { slice::from_raw_parts(haystack, len) };
+     rure_consume_internal(exp, haystack, match_info)
+ }
+ 
+ #[no_mangle]
+ extern "C" fn rure_max_submatch(rewrite: *const c_char) -> i32 {
+     let len = unsafe { CStr::from_ptr(rewrite).to_bytes().len() };
+     let pat = rewrite as *const u8;
+     let text = unsafe { slice::from_raw_parts(pat, len) };
+ 
+     rure_max_submatch_internal(text)
+ }
+ 
+ #[no_mangle]
+ extern "C" fn rure_check_rewrite_string(rewrite: *const c_char, cap_num: i32) -> bool {
+     let len = unsafe { CStr::from_ptr(rewrite).to_bytes().len() };
+     let pat = rewrite as *const u8;
+     let text = unsafe { slice::from_raw_parts(pat, len) };
+ 
+     rure_check_rewrite_string_internal(text, cap_num)
+ }
+ 
+ #[no_mangle]
+ extern "C" fn rure_rewrite_str_convert(rewrite: *const u8, length: size_t) -> *const c_char {
+     let rewrite = unsafe { slice::from_raw_parts(rewrite, length) };
+ 
+     rure_rewrite_str_convert_internal(rewrite)
+ }
+ 
+ #[no_mangle]
+ extern "C" fn rure_rewrite(
+     rewrite: *const u8,
+     length: size_t,
+     vecs: *const *const u8,
+     vecs_lengths: *const size_t,
+     vecs_count: size_t,
+ ) -> *const c_char {
+     // 获取rewrite
+     let rewrite = unsafe { slice::from_raw_parts(rewrite, length) };
+     let rewrite_str = std::str::from_utf8(rewrite).unwrap();
+ 
+     //获取vecs中的内容
+     let (raw_vecs, raw_vecsl) = unsafe {
+         (
+             slice::from_raw_parts(vecs, vecs_count),
+             slice::from_raw_parts(vecs_lengths, vecs_count),
+         )
+     };
+ 
+     let mut rure_vecs = Vec::with_capacity(vecs_count);
+     for (&raw_vec, &raw_vecl) in raw_vecs.iter().zip(raw_vecsl) {
+         let rure_vec = unsafe { slice::from_raw_parts(raw_vec, raw_vecl) };
+         rure_vecs.push(str::from_utf8(rure_vec).unwrap());
+     }
+ 
+     rure_rewrite_internal(rewrite_str, vecs_count, rure_vecs)
+ }
+ 
+ #[no_mangle]
+ extern "C" fn rure_replace_count(re: *const RegexUnicode, haystack: *const c_char) -> size_t {
+     let len = unsafe { CStr::from_ptr(haystack).to_bytes().len() };
+     let hay = haystack as *const u8;
+ 
+     let re = unsafe { &*re };
+     let haystack = unsafe { slice::from_raw_parts(hay, len) };
+     rure_replace_count_internal(haystack, re)
+ }
+ 
+ #[no_mangle]
+ extern "C" fn rure_filter_compile(
+     regex_str: *const u8,
+     regex_len: size_t,
+     min_atoms_len: size_t,
+ ) -> MyVec {
+     let r = unsafe { slice::from_raw_parts(regex_str, regex_len) };
+     let regex_str = str::from_utf8(r).unwrap();
+     let atoms = my_compile(regex_str, min_atoms_len as i32);
+     atoms
+ }
+ 
 
-pub use crate::error::*;
-pub use crate::rure::*;
diff --git a/regex-capi/src/rure.rs b/regex-capi/src/lib_internal.rs
similarity index 51%
rename from regex-capi/src/rure.rs
rename to regex-capi/src/lib_internal.rs
index 84f93e9..cec5ec6 100644
--- a/regex-capi/src/rure.rs
+++ b/regex-capi/src/lib_internal.rs
@@ -9,142 +9,15 @@
  * PURPOSE.
  * See the Mulan PSL v2 for more details.
  * Author: mengning<mengning@ustc.edu.cn>, liuzhitao<freekeeper@mail.ustc.edu.cn>, yangwentong<ywt0821@163.com>
- * Create: 2022-06-21
- * Description: Rure is a C API to Rust's regex library.
+ * Create: 2022-11-25
+ * Description: The business logic implementation layer uses pure rust.
  ******************************************************************************/
-use std::collections::HashMap;
-use std::ffi::{CStr, CString};
-use std::ops::Deref;
-use std::ptr;
-use std::slice;
-use std::str;
-
-use libc::{c_char, size_t};
-use regex::{bytes, Regex};
-
-use crate::error::{Error, ErrorKind};
-
-const RURE_FLAG_CASEI: u32 = 1 << 0;
-const RURE_FLAG_MULTI: u32 = 1 << 1;
-const RURE_FLAG_DOTNL: u32 = 1 << 2;
-const RURE_FLAG_SWAP_GREED: u32 = 1 << 3;
-const RURE_FLAG_SPACE: u32 = 1 << 4;
-const RURE_FLAG_UNICODE: u32 = 1 << 5;
-const RURE_DEFAULT_FLAGS: u32 = RURE_FLAG_UNICODE;
-
-pub struct RegexBytes {
-    re: bytes::Regex,
-    capture_names: HashMap<String, i32>,
-}
-
-pub struct RegexUnicode {
-    re: Regex,
-}
-
-pub struct Options {
-    size_limit: usize,
-    dfa_size_limit: usize,
-}
-
-// The `RegexSet` is not exposed with option support or matching at an
-// arbitrary position with a crate just yet. To circumvent this, we use
-// the `Exec` structure directly.
-pub struct RegexSet {
-    re: bytes::RegexSet,
-}
-
-#[repr(C)]
-pub struct rure_match {
-    pub start: size_t,
-    pub end: size_t,
-}
-
-pub struct Captures(bytes::Locations);
-
-pub struct Iter {
-    re: *const RegexBytes,
-    last_end: usize,
-    last_match: Option<usize>,
-}
-
-pub struct IterCaptureNames {
-    capture_names: bytes::CaptureNames<'static>,
-    name_ptrs: Vec<*mut c_char>,
-}
-
-#[repr(C)]
-pub struct Atoms {
-    atom: *mut c_char,
-}
-
-#[repr(C)]
-pub struct MyVec {
-    data: *mut Atoms,
-    len: i32,
-}
-
-impl Deref for RegexBytes {
-    type Target = bytes::Regex;
-    fn deref(&self) -> &bytes::Regex {
-        &self.re
-    }
-}
-
-impl Deref for RegexUnicode {
-    type Target = Regex;
-    fn deref(&self) -> &Regex {
-        &self.re
-    }
-}
-
-impl Deref for RegexSet {
-    type Target = bytes::RegexSet;
-    fn deref(&self) -> &bytes::RegexSet {
-        &self.re
-    }
-}
-
-impl Default for Options {
-    fn default() -> Options {
-        Options {
-            size_limit: 10 * (1 << 20),
-            dfa_size_limit: 2 * (1 << 20),
-        }
-    }
-}
-
-#[no_mangle]
-extern "C" fn rure_compile_must(pattern: *const c_char) -> *const RegexBytes {
-    let len = unsafe { CStr::from_ptr(pattern).to_bytes().len() };
-    let pat = pattern as *const u8;
-    let mut err = Error::new(ErrorKind::None);
-    let re = rure_compile(pat, len, RURE_DEFAULT_FLAGS, ptr::null(), &mut err);
-    // if err.is_err() {
-    //     let _ = writeln!(&mut io::stderr(), "{}", err);
-    //     let _ = writeln!(&mut io::stderr(), "aborting from rure_compile_must");
-    //     unsafe { abort() }
-    // }
-    re
-}
-
-#[no_mangle]
-extern "C" fn rure_compile(
-    pattern: *const u8,
-    length: size_t,
+fn rure_compile_internal(
+    pat: &str,
     flags: u32,
     options: *const Options,
     error: *mut Error,
 ) -> *const RegexBytes {
-    let pat = unsafe { slice::from_raw_parts(pattern, length) };
-    let pat = match str::from_utf8(pat) {
-        Ok(pat) => pat,
-        Err(err) => unsafe {
-            if !error.is_null() {
-                *error = Error::new(ErrorKind::Str(err));
-            }
-            return ptr::null();
-        },
-    };
     let mut builder = bytes::RegexBuilder::new(pat);
     if !options.is_null() {
         let options = unsafe { &*options };
@@ -159,13 +32,15 @@ extern "C" fn rure_compile(
     builder.unicode(flags & RURE_FLAG_UNICODE > 0);
     match builder.build() {
         Ok(re) => {
-            let mut capture_names = HashMap::new();
-            for (i, name) in re.capture_names().enumerate() {
-                if let Some(name) = name {
-                    capture_names.insert(name.to_owned(), i as i32);
-                }
-            }
-            let re = RegexBytes { re, capture_names };
+            // let mut capture_names = HashMap::new();
+            // for (i, name) in re.capture_names().enumerate() {
+            //     if let Some(name) = name {
+            //         capture_names.insert(name.to_owned(), i as i32);
+            //     }
+            // }
+            // let re = RegexBytes { re, capture_names };
+            let re = RegexBytes { re };
+
             Box::into_raw(Box::new(re))
         }
         Err(err) => unsafe {
@@ -177,33 +52,12 @@ extern "C" fn rure_compile(
     }
 }
 
-#[no_mangle]
-extern "C" fn rure_free(re: *const RegexBytes) {
-    unsafe { drop(Box::from_raw(re as *mut Regex)); }
-}
-
-#[no_mangle]
-extern "C" fn rure_is_match(
-    re: *const RegexBytes,
-    haystack: *const u8,
-    len: size_t,
-    _start: size_t,
-) -> bool {
-    let re = unsafe { &*re };
-    let haystack = unsafe { slice::from_raw_parts(haystack, len) };
-    re.is_match(haystack)
-}
-
-#[no_mangle]
-extern "C" fn rure_find(
-    re: *const RegexBytes,
-    haystack: *const u8,
-    len: size_t,
+fn rure_find_internal(
+    re: &RegexBytes,
+    haystack: &[u8],
     start: size_t,
     match_info: *mut rure_match,
 ) -> bool {
-    let re = unsafe { &*re };
-    let haystack = unsafe { slice::from_raw_parts(haystack, len) };
     re.find_at(haystack, start)
         .map(|m| unsafe {
             if !match_info.is_null() {
@@ -214,214 +68,11 @@ extern "C" fn rure_find(
         .is_some()
 }
 
-#[no_mangle]
-extern "C" fn rure_find_captures(
-    re: *const RegexBytes,
-    haystack: *const u8,
-    len: size_t,
-    start: size_t,
-    captures: *mut Captures,
-) -> bool {
-    let re = unsafe { &*re };
-    let haystack = unsafe { slice::from_raw_parts(haystack, len) };
-    let slots = unsafe { &mut (*captures).0 };
-    re.read_captures_at(slots, haystack, start).is_some()
-}
-
-#[no_mangle]
-extern "C" fn rure_shortest_match(
-    re: *const RegexBytes,
-    haystack: *const u8,
-    len: size_t,
-    start: size_t,
-    end: *mut usize,
-) -> bool {
-    let re = unsafe { &*re };
-    let haystack = unsafe { slice::from_raw_parts(haystack, len) };
-    match re.shortest_match_at(haystack, start) {
-        None => false,
-        Some(i) => {
-            if !end.is_null() {
-                unsafe {
-                    *end = i;
-                }
-            }
-            true
-        }
-    }
-}
-
-#[no_mangle]
-extern "C" fn rure_capture_name_index(re: *const RegexBytes, name: *const c_char) -> i32 {
-    let re = unsafe { &*re };
-    let name = unsafe { CStr::from_ptr(name) };
-    let name = match name.to_str() {
-        Err(_) => return -1,
-        Ok(name) => name,
-    };
-    re.capture_names.get(name).copied().unwrap_or(-1)
-}
-
-#[no_mangle]
-extern "C" fn rure_iter_capture_names_new(re: *const RegexBytes) -> *mut IterCaptureNames {
-    let re = unsafe { &*re };
-    Box::into_raw(Box::new(IterCaptureNames {
-        capture_names: re.re.capture_names(),
-        name_ptrs: Vec::new(),
-    }))
-}
-
-#[no_mangle]
-extern "C" fn rure_iter_capture_names_free(it: *mut IterCaptureNames) {
-    unsafe {
-        let it = &mut *it;
-        while let Some(ptr) = it.name_ptrs.pop() {
-            drop(CString::from_raw(ptr));
-        }
-        drop(Box::from_raw(it));
-    }
-}
-
-#[no_mangle]
-extern "C" fn rure_iter_capture_names_next(
-    it: *mut IterCaptureNames,
-    capture_name: *mut *mut c_char,
-) -> bool {
-    if capture_name.is_null() {
-        return false;
-    }
-    let it = unsafe { &mut *it };
-    let cn = match it.capture_names.next() {
-        // Top-level iterator ran out of capture groups
-        None => return false,
-        Some(val) => {
-            match val {
-                // inner Option didn't have a name
-                None => "",
-                Some(name) => name,
-            }
-        }
-    };
-    unsafe {
-        let cs = match CString::new(cn.as_bytes()) {
-            Result::Ok(val) => val,
-            Result::Err(_) => return false,
-        };
-        let ptr = cs.into_raw();
-        it.name_ptrs.push(ptr);
-        *capture_name = ptr;
-    }
-    true
-}
-
-#[no_mangle]
-extern "C" fn rure_iter_new(re: *const RegexBytes) -> *mut Iter {
-    Box::into_raw(Box::new(Iter {
-        re,
-        last_end: 0,
-        last_match: None,
-    }))
-}
-
-#[no_mangle]
-extern "C" fn rure_iter_free(it: *mut Iter) {
-    unsafe { drop(Box::from_raw(it)); }
-}
-
-#[no_mangle]
-extern "C" fn rure_iter_next(
-    it: *mut Iter,
-    haystack: *const u8,
-    len: size_t,
-    match_info: *mut rure_match,
-) -> bool {
-    let it = unsafe { &mut *it };
-    let re = unsafe { &*it.re };
-    let text = unsafe { slice::from_raw_parts(haystack, len) };
-    if it.last_end > text.len() {
-        return false;
-    }
-    let (s, e) = match re.find_at(text, it.last_end) {
-        None => return false,
-        Some(m) => (m.start(), m.end()),
-    };
-    if s == e {
-        // This is an empty match. To ensure we make progress, start
-        // the next search at the smallest possible starting position
-        // of the next match following this one.
-        it.last_end += 1;
-        // Don't accept empty matches immediately following a match.
-        // Just move on to the next match.
-        if Some(e) == it.last_match {
-            return rure_iter_next(it, haystack, len, match_info);
-        }
-    } else {
-        it.last_end = e;
-    }
-    it.last_match = Some(e);
-    if !match_info.is_null() {
-        unsafe {
-            (*match_info).start = s;
-            (*match_info).end = e;
-        }
-    }
-    true
-}
-
-#[no_mangle]
-extern "C" fn rure_iter_next_captures(
-    it: *mut Iter,
-    haystack: *const u8,
-    len: size_t,
-    captures: *mut Captures,
-) -> bool {
-    let it = unsafe { &mut *it };
-    let re = unsafe { &*it.re };
-    let slots = unsafe { &mut (*captures).0 };
-    let text = unsafe { slice::from_raw_parts(haystack, len) };
-    if it.last_end > text.len() {
-        return false;
-    }
-    let (s, e) = match re.read_captures_at(slots, text, it.last_end) {
-        None => return false,
-        Some(m) => (m.start(), m.end()),
-    };
-    if s == e {
-        // This is an empty match. To ensure we make progress, start
-        // the next search at the smallest possible starting position
-        // of the next match following this one.
-        it.last_end += 1;
-        // Don't accept empty matches immediately following a match.
-        // Just move on to the next match.
-        if Some(e) == it.last_match {
-            return rure_iter_next_captures(it, haystack, len, captures);
-        }
-    } else {
-        it.last_end = e;
-    }
-    it.last_match = Some(e);
-    true
-}
-
-#[no_mangle]
-extern "C" fn rure_captures_new(re: *const RegexBytes) -> *mut Captures {
-    let re = unsafe { &*re };
-    let captures = Captures(re.locations());
-    Box::into_raw(Box::new(captures))
-}
-
-#[no_mangle]
-extern "C" fn rure_captures_free(captures: *const Captures) {
-    unsafe { drop(Box::from_raw(captures as *mut Captures)); }
-}
-
-#[no_mangle]
-extern "C" fn rure_captures_at(
-    captures: *const Captures,
+fn rure_captures_at_internal(
+    locs: &CaptureLocations,
     i: size_t,
     match_info: *mut rure_match,
 ) -> bool {
-    let locs = unsafe { &(*captures).0 };
     match locs.pos(i) {
         Some((start, end)) => {
             if !match_info.is_null() {
@@ -436,49 +87,14 @@ extern "C" fn rure_captures_at(
     }
 }
 
-#[no_mangle]
-extern "C" fn rure_captures_len(captures: *const Captures) -> size_t {
-    unsafe { (*captures).0.len() }
-}
-
-#[no_mangle]
-extern "C" fn rure_options_new() -> *mut Options {
-    Box::into_raw(Box::new(Options::default()))
-}
-
-#[no_mangle]
-extern "C" fn rure_options_free(options: *mut Options) {
-    unsafe { drop(Box::from_raw(options)); }
-}
-
-#[no_mangle]
-extern "C" fn rure_options_size_limit(options: *mut Options, limit: size_t) {
-    let options = unsafe { &mut *options };
-    options.size_limit = limit;
-}
-
-#[no_mangle]
-extern "C" fn rure_options_dfa_size_limit(options: *mut Options, limit: size_t) {
-    let options = unsafe { &mut *options };
-    options.dfa_size_limit = limit;
-}
-
-#[no_mangle]
-extern "C" fn rure_compile_set(
-    patterns: *const *const u8,
-    patterns_lengths: *const size_t,
+fn rure_compile_set_internal(
+    raw_pats: &[*const u8],
+    raw_patsl: &[usize],
     patterns_count: size_t,
     flags: u32,
     options: *const Options,
     error: *mut Error,
 ) -> *const RegexSet {
-    let (raw_pats, raw_patsl) = unsafe {
-        (
-            slice::from_raw_parts(patterns, patterns_count),
-            slice::from_raw_parts(patterns_lengths, patterns_count),
-        )
-    };
-
     let mut pats = Vec::with_capacity(patterns_count);
     for (&raw_pat, &raw_patl) in raw_pats.iter().zip(raw_patsl) {
         let pat = unsafe { slice::from_raw_parts(raw_pat, raw_patl) };
@@ -516,35 +132,12 @@ extern "C" fn rure_compile_set(
     }
 }
 
-#[no_mangle]
-extern "C" fn rure_set_free(re: *const RegexSet) {
-    unsafe { drop(Box::from_raw(re as *mut RegexSet)); }
-}
-
-#[no_mangle]
-extern "C" fn rure_set_is_match(
-    re: *const RegexSet,
-    haystack: *const u8,
-    len: size_t,
-    start: size_t,
-) -> bool {
-    let re = unsafe { &*re };
-    let haystack = unsafe { slice::from_raw_parts(haystack, len) };
-    re.is_match_at(haystack, start)
-}
-
-#[no_mangle]
-extern "C" fn rure_set_matches(
-    re: *const RegexSet,
-    haystack: *const u8,
-    len: size_t,
+fn rure_set_matches_internal(
+    re: &RegexSet,
+    matches: &mut [bool],
+    haystack: &[u8],
     start: size_t,
-    matches: *mut bool,
 ) -> bool {
-    let re = unsafe { &*re };
-    let matches = unsafe { slice::from_raw_parts_mut(matches, re.len()) };
-    let haystack = unsafe { slice::from_raw_parts(haystack, len) };
-
     // read_matches_at isn't guaranteed to set non-matches to false
     for item in matches.iter_mut() {
         *item = false;
@@ -552,74 +145,7 @@ extern "C" fn rure_set_matches(
     re.read_matches_at(matches, haystack, start)
 }
 
-#[no_mangle]
-extern "C" fn rure_set_len(re: *const RegexSet) -> size_t {
-    unsafe { (*re).len() }
-}
-
-#[no_mangle]
-extern "C" fn rure_escape_must(pattern: *const c_char) -> *const c_char {
-        let len = unsafe { CStr::from_ptr(pattern).to_bytes().len() };
-        let pat = pattern as *const u8;
-        let mut err = Error::new(ErrorKind::None);
-        let esc = rure_escape(pat, len, &mut err);
-        if err.is_err() {
-            println!("{}", "aborting from rure_escape_must");
-            // let _ = writeln!(&mut io::stderr(), "{}", err);
-            // let _ = writeln!(
-            //     &mut io::stderr(), "aborting from rure_escape_must");
-            // unsafe { abort() }
-        }
-        esc
-    }
-
-
-/// A helper function that implements fallible escaping in a way that returns
-/// an error if escaping failed.
-///
-/// This should ideally be exposed, but it needs API design work. In
-/// particular, this should not return a C string, but a `const uint8_t *`
-/// instead, since it may contain a NUL byte.
-fn rure_escape(pattern: *const u8, length: size_t, error: *mut Error) -> *const c_char {
-    let pat: &[u8] = unsafe { slice::from_raw_parts(pattern, length) };
-    let str_pat = match str::from_utf8(pat) {
-        Ok(val) => val,
-        Err(err) => unsafe {
-            if !error.is_null() {
-                *error = Error::new(ErrorKind::Str(err));
-            }
-            return ptr::null();
-        },
-    };
-    let esc_pat = regex::escape(str_pat);
-    let c_esc_pat = match CString::new(esc_pat) {
-        Ok(val) => val,
-        Err(err) => unsafe {
-            if !error.is_null() {
-                *error = Error::new(ErrorKind::Nul(err));
-            }
-            return ptr::null();
-        },
-    };
-    c_esc_pat.into_raw() as *const c_char
-}
-
-#[no_mangle]
-extern "C" fn rure_cstring_free(s: *mut c_char) {
-    unsafe { drop(CString::from_raw(s)); }
-}
-
-#[no_mangle]
-extern "C" fn rure_replace(
-    re: *const RegexUnicode,
-    haystack: *const u8,
-    len_h: size_t,
-    rewrite: *const u8,
-    len_r: size_t,
-) -> *const u8 {
-    let re = unsafe { &*re };
-    let haystack = unsafe { slice::from_raw_parts(haystack, len_h) };
-    let rewrite = unsafe { slice::from_raw_parts(rewrite, len_r) };
+fn rure_replace_internal(re: &RegexUnicode, haystack: &[u8], rewrite: &[u8]) -> *const u8 {
     let haystack = match str::from_utf8(haystack) {
         Ok(haystack) => haystack,
         Err(_err) => {
@@ -643,17 +169,7 @@ extern "C" fn rure_replace(
     c_esc_pat.into_raw() as *const u8
 }
 
-#[no_mangle]
-extern "C" fn rure_replace_all(
-    re: *const RegexUnicode,
-    haystack: *const u8,
-    len_h: size_t,
-    rewrite: *const u8,
-    len_r: size_t,
-) -> *const u8 {
-    let re = unsafe { &*re };
-    let haystack = unsafe { slice::from_raw_parts(haystack, len_h) };
-    let rewrite = unsafe { slice::from_raw_parts(rewrite, len_r) };
+fn rure_replace_all_internal(re: &RegexUnicode, haystack: &[u8], rewrite: &[u8]) -> *const u8 {
     let haystack = match str::from_utf8(haystack) {
         Ok(haystack) => haystack,
         Err(_err) => {
@@ -677,13 +193,7 @@ extern "C" fn rure_replace_all(
     c_esc_pat.into_raw() as *const u8
 }
 
-/*
- *  Simple way to use regex
- */
-
-#[no_mangle]
-extern "C" fn rure_new(pattern: *const u8, length: size_t) -> *const RegexBytes {
-    let pat = unsafe { slice::from_raw_parts(pattern, length) };
+fn rure_new_internal(pat: &[u8]) -> *const RegexBytes {
     let pat = match str::from_utf8(pat) {
         Ok(pat) => pat,
         Err(_err) => {
@@ -697,15 +207,7 @@ extern "C" fn rure_new(pattern: *const u8, length: size_t) -> *const RegexBytes
     exp as *const RegexBytes
 }
 
-#[no_mangle]
-extern "C" fn rure_consume(
-    re: *const RegexBytes,
-    haystack: *const u8,
-    len: size_t,
-    match_info: *mut rure_match,
-) -> bool {
-    let exp = unsafe { &*re };
-    let haystack = unsafe { slice::from_raw_parts(haystack, len) };
+fn rure_consume_internal(exp: &RegexBytes, haystack: &[u8], match_info: *mut rure_match) -> bool {
     exp.find(haystack)
         .map(|m| unsafe {
             if !match_info.is_null() {
@@ -716,14 +218,10 @@ extern "C" fn rure_consume(
         .is_some()
 }
 
-#[no_mangle]
-extern "C" fn rure_max_submatch(rewrite: *const c_char) -> i32 {
+fn rure_max_submatch_internal(text: &[u8]) -> i32 {
     let mut max: i32 = 0;
     let mut flag = 0;
     let zero_number = '0' as i32;
-    let len = unsafe { CStr::from_ptr(rewrite).to_bytes().len() };
-    let pat = rewrite as *const u8;
-    let text = unsafe { slice::from_raw_parts(pat, len) };
     let rewrite = std::str::from_utf8(text).unwrap();
     for s in rewrite.chars() {
         if s == '\\' {
@@ -741,11 +239,7 @@ extern "C" fn rure_max_submatch(rewrite: *const c_char) -> i32 {
     max
 }
 
-#[no_mangle]
-extern "C" fn rure_check_rewrite_string(rewrite: *const c_char, cap_num: i32) -> bool {
-    let len = unsafe { CStr::from_ptr(rewrite).to_bytes().len() };
-    let pat = rewrite as *const u8;
-    let text = unsafe { slice::from_raw_parts(pat, len) };
+fn rure_check_rewrite_string_internal(text: &[u8], cap_num: i32) -> bool {
     let s = std::str::from_utf8(text).unwrap();
     let mut max_token = -1;
     let chars = s.chars().collect::<Vec<char>>();
@@ -786,9 +280,7 @@ extern "C" fn rure_check_rewrite_string(rewrite: *const c_char, cap_num: i32) ->
     return true;
 }
 
-#[no_mangle]
-extern "C" fn rure_rewrite_str_convert(rewrite: *const u8, length: size_t) -> *const c_char {
-    let rewrite = unsafe { slice::from_raw_parts(rewrite, length) };
+fn rure_rewrite_str_convert_internal(rewrite: &[u8]) -> *const c_char {
     let rewrite_str = std::str::from_utf8(rewrite).unwrap();
     let rewrite_chars = rewrite_str.chars().collect::<Vec<char>>();
     let mut i = 0;
@@ -826,32 +318,11 @@ extern "C" fn rure_rewrite_str_convert(rewrite: *const u8, length: size_t) -> *c
     rure_str.into_raw() as *const c_char
 }
 
-#[no_mangle]
-extern "C" fn rure_rewrite(
-    rewrite: *const u8,
-    length: size_t,
-    vecs: *const *const u8,
-    vecs_lengths: *const size_t,
+fn rure_rewrite_internal(
+    rewrite_str: &str,
     vecs_count: size_t,
+    rure_vecs: Vec<&str>,
 ) -> *const c_char {
-    // 获取rewrite
-    let rewrite = unsafe { slice::from_raw_parts(rewrite, length) };
-    let rewrite_str = std::str::from_utf8(rewrite).unwrap();
-
-    //获取vecs中的内容
-    let (raw_vecs, raw_vecsl) = unsafe {
-        (
-            slice::from_raw_parts(vecs, vecs_count),
-            slice::from_raw_parts(vecs_lengths, vecs_count),
-        )
-    };
-
-    let mut rure_vecs = Vec::with_capacity(vecs_count);
-    for (&raw_vec, &raw_vecl) in raw_vecs.iter().zip(raw_vecsl) {
-        let rure_vec = unsafe { slice::from_raw_parts(raw_vec, raw_vecl) };
-        rure_vecs.push(str::from_utf8(rure_vec).unwrap());
-    }
-
     let rewrite_chars = rewrite_str.chars().collect::<Vec<char>>();
     let mut i = 0;
     let mut out = String::new();
@@ -897,24 +368,19 @@ extern "C" fn rure_rewrite(
     out.into_raw() as *const c_char
 }
 
-#[no_mangle]
-extern "C" fn rure_replace_count(re: *const RegexUnicode, haystack: *const c_char) -> size_t {
-    let len = unsafe { CStr::from_ptr(haystack).to_bytes().len() };
-    let hay = haystack as *const u8;
+fn rure_replace_count_internal(haystack: &[u8], re: &RegexUnicode) -> size_t {
     let mut count = 0;
-    let re = unsafe { &*re };
-    let haystack = unsafe { slice::from_raw_parts(hay, len) };
     let haystack = str::from_utf8(haystack).unwrap();
     for _mat in re.find_iter(haystack) {
         count += 1;
     }
-    return count;
+    count
 }
 
 /**
- * 负责对字符集进行连接操作
- *
- */
+* 负责对字符集进行连接操作
+*
+*/
 fn connection(str: &str, vec1: Vec<String>, vec2: Vec<char>) -> Vec<String> {
     let mut vec_tmp = Vec::new();
     if str.len() > 0 {
@@ -939,10 +405,10 @@ fn connection(str: &str, vec1: Vec<String>, vec2: Vec<char>) -> Vec<String> {
 
 /**
 *  (abc123|abc|ghi789|abc1234)
-   3-abc
-   6-abc123
-   6-ghi789
-   7-abc1234
+ 3-abc
+ 6-abc123
+ 6-ghi789
+ 7-abc1234
 * abc  abc123  ghi789  abc1234
 */
 fn group_multiple_selection(str: &str, min_atoms_len: i32) -> Vec<String> {
@@ -978,11 +444,11 @@ fn group_multiple_selection(str: &str, min_atoms_len: i32) -> Vec<String> {
 }
 
 /**
- * 处理
- * a[a-c]a[zv]
- * [abc]
- * [a-c]+
- */
+* 处理
+* a[a-c]a[zv]
+* [abc]
+* [a-c]+
+*/
 
 fn char_class_expansion(str: &str) -> Vec<char> {
     let mut flag_connect = 0;
@@ -1165,15 +631,3 @@ fn my_compile(str: &str, min_atoms_len: i32) -> MyVec {
     std::mem::forget(a);
     MyVec { data, len }
 }
-
-#[no_mangle]
-extern "C" fn rure_filter_compile(
-    regex_str: *const u8,
-    regex_len: size_t,
-    min_atoms_len: size_t,
-) -> MyVec {
-    let r = unsafe { slice::from_raw_parts(regex_str, regex_len) };
-    let regex_str = str::from_utf8(r).unwrap();
-    let atoms = my_compile(regex_str, min_atoms_len as i32);
-    atoms
-}
-- 
Gitee