From e54fc70dc61862a3efcc5d6617eb8a78bdaf7b2f Mon Sep 17 00:00:00 2001 From: yangwentong <425822674@qq.com> Date: Wed, 30 Nov 2022 12:27:24 +0800 Subject: [PATCH] =?UTF-8?q?=E5=8E=BB=E9=99=A4lib=5Finternal.rs=E4=B8=AD?= =?UTF-8?q?=E7=9A=84unsafe?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- regex-capi/src/lib.rs | 84 +++++++++++++++++++++-- regex-capi/src/lib_internal.rs | 118 +++------------------------------ 2 files changed, 87 insertions(+), 115 deletions(-) diff --git a/regex-capi/src/lib.rs b/regex-capi/src/lib.rs index d28bae5..d7b7799 100644 --- a/regex-capi/src/lib.rs +++ b/regex-capi/src/lib.rs @@ -23,7 +23,7 @@ use std::str; use libc::{c_char, size_t}; - use regex::bytes::CaptureLocations; + use regex::{bytes, Regex}; use crate::error::{Error, ErrorKind}; @@ -148,7 +148,26 @@ return ptr::null(); }, }; - rure_compile_internal(pat, flags, options, error) + let mut builder = rure_compile_internal(pat, flags); + + if !options.is_null() { + let options = unsafe { &*options }; + builder.size_limit(options.size_limit); + builder.dfa_size_limit(options.dfa_size_limit); + } + + match builder.build() { + Ok(re) => { + let re = RegexBytes { re }; + Box::into_raw(Box::new(re)) + }, + Err(err) => unsafe { + if !error.is_null() { + *error = Error::new(ErrorKind::Regex(err)); + } + ptr::null() + }, + } } #[no_mangle] @@ -180,7 +199,14 @@ ) -> bool { let re = unsafe { &*re }; let haystack = unsafe { slice::from_raw_parts(haystack, len) }; - rure_find_internal(re, haystack, start, match_info) + re.find_at(haystack, start) + .map(|m| unsafe { + if !match_info.is_null() { + (*match_info).start = m.start(); + (*match_info).end = m.end(); + } + }) + .is_some() } #[no_mangle] @@ -270,7 +296,18 @@ match_info: *mut rure_match, ) -> bool { let locs = unsafe { &(*captures).0 }; - rure_captures_at_internal(locs, i, match_info) + match locs.pos(i) { + Some((start, end)) => { + if !match_info.is_null() { + unsafe { + (*match_info).start = start; + (*match_info).end = end; + } + } + true + } + _ => false, + } } #[no_mangle] @@ -293,7 +330,35 @@ slice::from_raw_parts(patterns_lengths, patterns_count), ) }; - rure_compile_set_internal(raw_pats, raw_patsl, patterns_count, flags, options, error) + let mut pats = Vec::with_capacity(patterns_count); + for (&raw_pat, &raw_patl) in raw_pats.iter().zip(raw_patsl) { + let pat = unsafe { slice::from_raw_parts(raw_pat, raw_patl) }; + pats.push(match str::from_utf8(pat) { + Ok(pat) => pat, + Err(err) => unsafe { + if !error.is_null() { + *error = Error::new(ErrorKind::Str(err)); + } + return ptr::null(); + }, + }); + } + + let mut builder = rure_compile_set_internal(pats, flags); + if !options.is_null() { + let options = unsafe { &*options }; + builder.size_limit(options.size_limit); + builder.dfa_size_limit(options.dfa_size_limit); + } + match builder.build() { + Ok(re) => Box::into_raw(Box::new(RegexSet { re })), + Err(err) => unsafe { + if !error.is_null() { + *error = Error::new(ErrorKind::Regex(err)) + } + ptr::null() + }, + } } #[no_mangle] @@ -434,7 +499,14 @@ ) -> bool { let exp = unsafe { &*re }; let haystack = unsafe { slice::from_raw_parts(haystack, len) }; - rure_consume_internal(exp, haystack, match_info) + exp.find(haystack) + .map(|m| unsafe { + if !match_info.is_null() { + (*match_info).start = m.start(); + (*match_info).end = m.end(); + } + }) + .is_some() } #[no_mangle] diff --git a/regex-capi/src/lib_internal.rs b/regex-capi/src/lib_internal.rs index cec5ec6..fb331d0 100644 --- a/regex-capi/src/lib_internal.rs +++ b/regex-capi/src/lib_internal.rs @@ -12,124 +12,35 @@ * Create: 2022-11-25 * Description: The business logic implementation layer uses pure rust. ******************************************************************************/ -fn rure_compile_internal( +use regex::bytes::RegexBuilder; +use regex::bytes::RegexSetBuilder; + fn rure_compile_internal( pat: &str, flags: u32, - options: *const Options, - error: *mut Error, -) -> *const RegexBytes { +) -> RegexBuilder { let mut builder = bytes::RegexBuilder::new(pat); - if !options.is_null() { - let options = unsafe { &*options }; - builder.size_limit(options.size_limit); - builder.dfa_size_limit(options.dfa_size_limit); - } builder.case_insensitive(flags & RURE_FLAG_CASEI > 0); builder.multi_line(flags & RURE_FLAG_MULTI > 0); builder.dot_matches_new_line(flags & RURE_FLAG_DOTNL > 0); builder.swap_greed(flags & RURE_FLAG_SWAP_GREED > 0); builder.ignore_whitespace(flags & RURE_FLAG_SPACE > 0); builder.unicode(flags & RURE_FLAG_UNICODE > 0); - match builder.build() { - Ok(re) => { - // let mut capture_names = HashMap::new(); - // for (i, name) in re.capture_names().enumerate() { - // if let Some(name) = name { - // capture_names.insert(name.to_owned(), i as i32); - // } - // } - // let re = RegexBytes { re, capture_names }; - let re = RegexBytes { re }; - - Box::into_raw(Box::new(re)) - } - Err(err) => unsafe { - if !error.is_null() { - *error = Error::new(ErrorKind::Regex(err)); - } - ptr::null() - }, - } -} - -fn rure_find_internal( - re: &RegexBytes, - haystack: &[u8], - start: size_t, - match_info: *mut rure_match, -) -> bool { - re.find_at(haystack, start) - .map(|m| unsafe { - if !match_info.is_null() { - (*match_info).start = m.start(); - (*match_info).end = m.end(); - } - }) - .is_some() -} - -fn rure_captures_at_internal( - locs: &CaptureLocations, - i: size_t, - match_info: *mut rure_match, -) -> bool { - match locs.pos(i) { - Some((start, end)) => { - if !match_info.is_null() { - unsafe { - (*match_info).start = start; - (*match_info).end = end; - } - } - true - } - _ => false, - } + builder } fn rure_compile_set_internal( - raw_pats: &[*const u8], - raw_patsl: &[usize], - patterns_count: size_t, + pats: Vec<&str>, flags: u32, - options: *const Options, - error: *mut Error, -) -> *const RegexSet { - let mut pats = Vec::with_capacity(patterns_count); - for (&raw_pat, &raw_patl) in raw_pats.iter().zip(raw_patsl) { - let pat = unsafe { slice::from_raw_parts(raw_pat, raw_patl) }; - pats.push(match str::from_utf8(pat) { - Ok(pat) => pat, - Err(err) => unsafe { - if !error.is_null() { - *error = Error::new(ErrorKind::Str(err)); - } - return ptr::null(); - }, - }); - } - +) -> RegexSetBuilder { let mut builder = bytes::RegexSetBuilder::new(pats); - if !options.is_null() { - let options = unsafe { &*options }; - builder.size_limit(options.size_limit); - builder.dfa_size_limit(options.dfa_size_limit); - } + builder.case_insensitive(flags & RURE_FLAG_CASEI > 0); builder.multi_line(flags & RURE_FLAG_MULTI > 0); builder.dot_matches_new_line(flags & RURE_FLAG_DOTNL > 0); builder.swap_greed(flags & RURE_FLAG_SWAP_GREED > 0); builder.ignore_whitespace(flags & RURE_FLAG_SPACE > 0); builder.unicode(flags & RURE_FLAG_UNICODE > 0); - match builder.build() { - Ok(re) => Box::into_raw(Box::new(RegexSet { re })), - Err(err) => unsafe { - if !error.is_null() { - *error = Error::new(ErrorKind::Regex(err)) - } - ptr::null() - }, - } + builder } fn rure_set_matches_internal( @@ -207,17 +118,6 @@ fn rure_new_internal(pat: &[u8]) -> *const RegexBytes { exp as *const RegexBytes } -fn rure_consume_internal(exp: &RegexBytes, haystack: &[u8], match_info: *mut rure_match) -> bool { - exp.find(haystack) - .map(|m| unsafe { - if !match_info.is_null() { - (*match_info).start = m.start(); - (*match_info).end = m.end(); - } - }) - .is_some() -} - fn rure_max_submatch_internal(text: &[u8]) -> i32 { let mut max: i32 = 0; let mut flag = 0; -- Gitee