From d054a147a5366874cb5c0fe70ae19a5fb2967ca3 Mon Sep 17 00:00:00 2001 From: Spencer Date: Wed, 27 Aug 2025 21:14:33 +0800 Subject: [PATCH 01/40] feat: add cpu_timecounter --- include/common_util/timecount.h | 90 +++++++++++++++++++++++---------- include/common_util/timeutil.h | 10 +++- src/common_util/timecount.cpp | 41 +++++++++++---- src/common_util/timeutil.cpp | 68 +++++++++++++++++-------- src/usage_demo/main.cpp | 4 +- src/usage_demo/timecount.hpp | 14 ++++- 6 files changed, 164 insertions(+), 63 deletions(-) diff --git a/include/common_util/timecount.h b/include/common_util/timecount.h index a98856e..35381bf 100644 --- a/include/common_util/timecount.h +++ b/include/common_util/timecount.h @@ -7,49 +7,87 @@ * * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations. + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing permissions and + * limitations. * * @file timecount.h * @brief A simple time counter class to measure the execution time of a function. * - * Measure the execution time of a function by using the feature that a class instance object automatically destructs when it exits it's scope. + * Measure the execution time of a function by using the feature that a class instance object + * automatically destructs when it exits it's scope. * - * The time precision is microsecond,use monotone increment time, which presents the duration time between the system start time and the current time. + * The time precision is microsecond,use monotone increment time, which presents the duration time + * between the system start time and the current time. * @author spencer.luo * @date 2024-05-13 */ #pragma once -#include #include +#include #include namespace cutl { +/** + * @brief A simple time counter class for measure the execution time of a function. + * + */ +class timecount +{ +public: + /** + * @brief Construct a new timecount object + * The constructor will record the begin time of the function calling. + * @param func_name + */ + timecount(const std::string& func_name); + /** + * @brief Destroy the timecount object + * The desctructor will record the end time of the function calling and calculate the execution + * time. + */ + ~timecount(); + +private: + std::string func_name_; + std::atomic start_time_; +}; + +// Rename `timecount` to `steady_timecounter` +using steady_timecounter = timecount; + +/** + * @brief A time counter class for counting the CPU usage time of a function + * + */ +class cpu_timecounter +{ +public: /** - * @brief A simple time counter class to measure the execution time of a function. - * + * @brief Construct a new cpu timecounter object + * The constructor will record the begin time of the function calling. + * @param func_name */ - class timecount - { - public: - /** - * @brief Construct a new timecount object - * The constructor will record the begin time of the function calling. - * @param func_name - */ - timecount(const std::string &func_name); - /** - * @brief Destroy the timecount object - * The desctructor will record the end time of the function calling and calculate the execution time. - */ - ~timecount(); - - private: - std::string func_name_; - std::atomic start_time_; - }; + explicit cpu_timecounter(const std::string& func_name); + + /** + * @brief Destroy the cpu timecounter object + * The desctructor will record the end time of the function calling and calculate the execution + * time. + */ + ~cpu_timecounter(); + + // Disable copy and assignment + cpu_timecounter(const cpu_timecounter&) = delete; + cpu_timecounter& operator=(const cpu_timecounter&) = delete; + +private: + std::string func_name_; + std::atomic start_time_; +}; } // namespace \ No newline at end of file diff --git a/include/common_util/timeutil.h b/include/common_util/timeutil.h index 7d49c0f..8e61982 100644 --- a/include/common_util/timeutil.h +++ b/include/common_util/timeutil.h @@ -45,13 +45,21 @@ namespace cutl */ uint64_t timestamp(timeunit unit); /** - * @brief Get current clock time for monotone increment time. + * @brief Get the current time of the steady clock * * @param unit time unit * @return uint64_t clock time */ uint64_t clocktime(timeunit unit); + /** + * @brief Get the CPU time of the current process + * + * @param unit time unit + * @return uint64_t CPU time + */ + uint64_t cpu_clocktime(timeunit unit); + /** * @brief Convert time from milliseconds to seconds. * diff --git a/src/common_util/timecount.cpp b/src/common_util/timecount.cpp index b5ad9d5..2b528e7 100644 --- a/src/common_util/timecount.cpp +++ b/src/common_util/timecount.cpp @@ -7,8 +7,10 @@ * * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations. + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing permissions and + * limitations. * * @file timecount.cpp * @brief @@ -17,9 +19,9 @@ */ #include "timecount.h" -#include "timeutil.h" -#include "strfmt.h" #include "inner/logger.h" +#include "strfmt.h" +#include "timeutil.h" namespace cutl { @@ -30,11 +32,28 @@ timecount::timecount(const std::string& func_name) { } - timecount::~timecount() - { - auto end_time = clocktime(timeunit::us); - auto duration = end_time - start_time_; - auto text = "[timecount] " + func_name_ + " used " + fmt_timeduration_us(duration); - CUTL_LOGGER.info("", text); - } +timecount::~timecount() +{ + auto end_time = clocktime(timeunit::us); + auto duration = end_time - start_time_; + auto text = "[timecount] " + func_name_ + " used " + fmt_timeduration_us(duration); + CUTL_LOGGER.info("", text); +} + +// 构造函数,记录开始时间 +cpu_timecounter::cpu_timecounter(const std::string& func_name) + : func_name_(func_name) + , start_time_(cpu_clocktime(timeunit::us)) +{ +} + +cpu_timecounter::~cpu_timecounter() +{ + uint64_t end_time = cpu_clocktime(timeunit::us); + uint64_t duration = end_time - start_time_; + + auto text = "[timecount] " + func_name_ + " used " + fmt_timeduration_us(duration); + CUTL_LOGGER.info("", text); +} + } // namespace \ No newline at end of file diff --git a/src/common_util/timeutil.cpp b/src/common_util/timeutil.cpp index bec178b..7cfa0e1 100644 --- a/src/common_util/timeutil.cpp +++ b/src/common_util/timeutil.cpp @@ -18,15 +18,16 @@ #include "timeutil.h" #include - +#if defined(_WIN32) +#include +#else +#include +#include +#endif namespace cutl { - uint64_t timestamp(timeunit unit) + uint64_t get_time_by_unit(uint64_t us, timeunit unit) { - // for C++11 and later - auto now = std::chrono::system_clock::now(); - auto timestamp_ms = std::chrono::duration_cast(now.time_since_epoch()).count(); - auto us = static_cast(timestamp_ms); uint64_t t = 0; switch (unit) { @@ -45,6 +46,44 @@ namespace cutl return t; } + uint64_t timestamp(timeunit unit) + { + // for C++11 and later + auto now = std::chrono::system_clock::now(); + auto timestamp_ms = std::chrono::duration_cast(now.time_since_epoch()).count(); + auto us = static_cast(timestamp_ms); + + return get_time_by_unit(us, unit); + } + + uint64_t cpu_clocktime(timeunit unit) + { +#if defined(_WIN32) + FILETIME createTime, exitTime, kernelTime, userTime; + if (GetProcessTimes(GetCurrentProcess(), &createTime, &exitTime, &kernelTime, &userTime)) { + // 将 FILETIME 转换为 64 位整数 + ULARGE_INTEGER user, kernel; + user.LowPart = userTime.dwLowDateTime; + user.HighPart = userTime.dwHighDateTime; + kernel.LowPart = kernelTime.dwLowDateTime; + kernel.HighPart = kernelTime.dwHighDateTime; + + // 转换为微秒 (100 纳秒单位 -> 微秒) + return (user.QuadPart + kernel.QuadPart) / 10; + } + return 0; +#else + struct rusage usage; + getrusage(RUSAGE_SELF, &usage); + + // 用户时间 + 系统时间,转换为微秒 + uint64_t user_us = static_cast(usage.ru_utime.tv_sec) * 1000000 + usage.ru_utime.tv_usec; + uint64_t system_us = static_cast(usage.ru_stime.tv_sec) * 1000000 + usage.ru_stime.tv_usec; + + return user_us + system_us; +#endif + } + uint64_t clocktime(timeunit unit) { // for C++11 and later @@ -52,22 +91,7 @@ namespace cutl auto run_time_duration = std::chrono::duration_cast(run_time.time_since_epoch()).count(); auto us = static_cast(run_time_duration); - uint64_t t = 0; - switch (unit) - { - case timeunit::s: - t = us2s(us); - break; - case timeunit::ms: - t = us2ms(us); - break; - case timeunit::us: - t = us; - break; - default: - break; - } - return t; + return get_time_by_unit(us, unit); } constexpr static int THOUSAND = 1000; diff --git a/src/usage_demo/main.cpp b/src/usage_demo/main.cpp index de48683..2371291 100644 --- a/src/usage_demo/main.cpp +++ b/src/usage_demo/main.cpp @@ -39,7 +39,7 @@ int main(int argc, char *argv[]) // TestSysutil(); // TestStrfmt(); // TestTimeutil(); - // TestTimecount(); + TestTimecount(); // TestDatetime(); // TestVerUtil(); // TestStrUtil(); @@ -50,7 +50,7 @@ int main(int argc, char *argv[]) // TestTimer(); // TestLRUCache(); // TestThreadUtil(); - TestEventLoop(); + // TestEventLoop(); // TestThreadPool(); // TestAlgorithmUtil(); diff --git a/src/usage_demo/timecount.hpp b/src/usage_demo/timecount.hpp index c93a66f..ad577f9 100644 --- a/src/usage_demo/timecount.hpp +++ b/src/usage_demo/timecount.hpp @@ -2,6 +2,7 @@ #include "common.hpp" #include "common_util/timecount.h" +#include #include #include #include @@ -129,8 +130,19 @@ void TimecountUsage() PrintTitle("TimecountUsage"); cutl::timecount tcount(__func__); + // 统计CPU占用时间,不包含休眠或等待时间 + cutl::cpu_timecounter cpu_counter("CPU_Timer_Count"); std::cout << "TestTimecount begin" << std::endl; + + // 执行一些计算密集型操作 + std::vector numbers(1000000); + for (size_t i = 0; i < numbers.size(); ++i) + { + numbers[i] = std::sin(i) * std::cos(i); + } + // 休眠1s std::this_thread::sleep_for(std::chrono::seconds(1)); + std::cout << "TestTimecount end" << std::endl; } @@ -139,5 +151,5 @@ void TestTimecount() PrintTitle("timecount"); TimecountUsage(); - TestGetFromContainer(); + // TestGetFromContainer(); } -- Gitee From eb5f429e7cd7197e47d070bee6b75eaad9ee82a9 Mon Sep 17 00:00:00 2001 From: Spencer Date: Wed, 27 Aug 2025 22:53:48 +0800 Subject: [PATCH 02/40] fix: timecount.h --- include/common_util/timecount.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/common_util/timecount.h b/include/common_util/timecount.h index 35381bf..e39dda1 100644 --- a/include/common_util/timecount.h +++ b/include/common_util/timecount.h @@ -57,7 +57,8 @@ private: std::atomic start_time_; }; -// Rename `timecount` to `steady_timecounter` +// Rename `timecount` to `steady_timecounter`, In order to be compatible with older versions, +// retain the original name. using steady_timecounter = timecount; /** -- Gitee From 51ebdb22620d0a05b93ad876cc881d8c34d59403 Mon Sep 17 00:00:00 2001 From: Spencer Date: Sun, 31 Aug 2025 17:02:20 +0800 Subject: [PATCH 03/40] fix: print.h --- include/common_util/print.h | 112 ++++++++++++++++++++++++++++++++++++ src/usage_demo/main.cpp | 4 +- src/usage_demo/print.hpp | 50 ++++++++++++++++ 3 files changed, 164 insertions(+), 2 deletions(-) diff --git a/include/common_util/print.h b/include/common_util/print.h index 1446b29..2741ae0 100644 --- a/include/common_util/print.h +++ b/include/common_util/print.h @@ -20,6 +20,7 @@ #pragma once #include +#include #include #include #include @@ -207,4 +208,115 @@ void print_unordered_map(const std::unordered_map& mp, bool format = false } } +/** + * @brief Print int matrix, support int8_t, int16_t, int32_t, int64_t + * + * @tparam T data type + * @param matrix + * @param rows + * @param cols + * @param name + * @param startY + * @param startX + * @param h + * @param w + */ +template +void print_int_matrix(const T* matrix, + uint32_t rows, + uint32_t cols, + const std::string& name, + uint32_t startY = 0, + uint32_t startX = 0, + uint32_t h = 5, + uint32_t w = 5) +{ + uint32_t H = rows - startY; + if (H <= 0) + { + std::cout << "startY should less than rows:" << rows << std::endl; + return; + } + uint32_t W = cols - startX; + if (W <= 0) + { + std::cout << "startY should less than rows:" << rows << std::endl; + return; + } + H = std::min(H, h); + W = std::min(W, w); + + // 打印矩阵 + std::cout << "The sub matrix of " << name << ", from (" << startY << ", " << startX + << "), size: " << h << " * " << w << std::endl; + for (uint32_t i = startY; i < startY + H; i++) + { + for (uint32_t j = startX; j < startX + W; j++) + { + std::cout << (int)(matrix[i * cols + j]) << " "; + } + std::cout << std::endl; + } +} + +/** + * @brief Print float matrix, support float or double data type. + * + * @tparam T data type + * @param matrix matrix pointer + * @param rows + * @param cols + * @param name + * @param precision + * @param startY + * @param startX + * @param h + * @param w + */ +template +void print_float_matrix(const T* matrix, + uint32_t rows, + uint32_t cols, + const std::string& name, + uint32_t precision = 2, + uint32_t startY = 0, + uint32_t startX = 0, + uint32_t h = 5, + uint32_t w = 5) +{ + uint32_t H = rows - startY; + if (H <= 0) + { + std::cout << "startY should less than rows:" << rows << std::endl; + return; + } + uint32_t W = cols - startX; + if (W <= 0) + { + std::cout << "startY should less than rows:" << rows << std::endl; + return; + } + H = std::min(H, h); + W = std::min(W, w); + + // 保存原始cout设置 + std::ios old_state(nullptr); + old_state.copyfmt(std::cout); + // 设置输出格式:固定小数点和指定精度 + std::cout << std::fixed << std::setprecision(precision); + // 打印矩阵 + std::cout << "The sub matrix of " << name << ", from (" << startY << ", " << startX + << "), size: " << h << " * " << w << std::endl; + for (uint32_t i = startY; i < startY + H; i++) + { + for (uint32_t j = startX; j < startX + W; j++) + { + std::cout << matrix[i * cols + j] << " "; + } + std::cout << std::endl; + } + // 恢复原始cout设置 + std::cout.copyfmt(old_state); +} + } // namespace cutl diff --git a/src/usage_demo/main.cpp b/src/usage_demo/main.cpp index 2371291..598adc5 100644 --- a/src/usage_demo/main.cpp +++ b/src/usage_demo/main.cpp @@ -39,14 +39,14 @@ int main(int argc, char *argv[]) // TestSysutil(); // TestStrfmt(); // TestTimeutil(); - TestTimecount(); + // TestTimecount(); // TestDatetime(); // TestVerUtil(); // TestStrUtil(); // TestFilePath(); // TestFileUtil(); // TestDlLoader(); - // TestPrint(); + TestPrint(); // TestTimer(); // TestLRUCache(); // TestThreadUtil(); diff --git a/src/usage_demo/print.hpp b/src/usage_demo/print.hpp index 193a19f..a4f4c2c 100644 --- a/src/usage_demo/print.hpp +++ b/src/usage_demo/print.hpp @@ -70,6 +70,54 @@ void PrintColor() std::cout << "norman: Hello World" << std::endl; } +// (用随机数)初始化矩阵 +void initializeMatrix(float* matrix, int rows, int cols) +{ + for (int i = 0; i < rows; i++) + { + for (int j = 0; j < cols; j++) + { + matrix[i * cols + j] = (static_cast(rand()) / RAND_MAX) * 10; + } + } +} + +// (用随机数)初始化矩阵 +void initializeMatrix(int8_t* matrix, int rows, int cols) +{ + for (int i = 0; i < rows; i++) + { + for (int j = 0; j < cols; j++) + { + matrix[i * cols + j] = i * cols + j; + } + } +} + +void PrintIntMatrix() +{ + int M = 10; + int N = 12; + int8_t matrix[M * N] = { 0 }; + // 初始化矩阵 + initializeMatrix(matrix, M, N); + // 打印矩阵 + cutl::print_int_matrix(matrix, M, N, "int matrix"); + cutl::print_int_matrix(matrix, M, N, "int matrix", 5, 5, M - 5, N - 5); +} + +void PrintFloatMatrix() +{ + int M = 10; + int N = 12; + float matrix[M * N] = { 0 }; + // 初始化矩阵 + initializeMatrix(matrix, M, N); + // 打印矩阵 + cutl::print_float_matrix(matrix, M, N, "float matrix"); + cutl::print_float_matrix(matrix, M, N, "float matrix", 1, 6, 8, M - 6, N - 8); +} + void TestPrint() { PrintTitle("print"); @@ -79,4 +127,6 @@ void TestPrint() PrintVec(); PrintMap(); PrintUnorderedMap(); + PrintIntMatrix(); + PrintFloatMatrix(); } -- Gitee From 9f686843b9edcb1931f0be9ddabc819ada14425c Mon Sep 17 00:00:00 2001 From: Spencer Date: Sun, 31 Aug 2025 19:27:18 +0800 Subject: [PATCH 04/40] fix: print_matrix for print.h --- include/common_util/print.h | 87 ++++++++++--------------------------- src/usage_demo/print.hpp | 8 ++-- 2 files changed, 27 insertions(+), 68 deletions(-) diff --git a/include/common_util/print.h b/include/common_util/print.h index 2741ae0..d01f64a 100644 --- a/include/common_util/print.h +++ b/include/common_util/print.h @@ -209,80 +209,31 @@ void print_unordered_map(const std::unordered_map& mp, bool format = false } /** - * @brief Print int matrix, support int8_t, int16_t, int32_t, int64_t - * - * @tparam T data type - * @param matrix - * @param rows - * @param cols - * @param name - * @param startY - * @param startX - * @param h - * @param w - */ -template -void print_int_matrix(const T* matrix, - uint32_t rows, - uint32_t cols, - const std::string& name, - uint32_t startY = 0, - uint32_t startX = 0, - uint32_t h = 5, - uint32_t w = 5) -{ - uint32_t H = rows - startY; - if (H <= 0) - { - std::cout << "startY should less than rows:" << rows << std::endl; - return; - } - uint32_t W = cols - startX; - if (W <= 0) - { - std::cout << "startY should less than rows:" << rows << std::endl; - return; - } - H = std::min(H, h); - W = std::min(W, w); - - // 打印矩阵 - std::cout << "The sub matrix of " << name << ", from (" << startY << ", " << startX - << "), size: " << h << " * " << w << std::endl; - for (uint32_t i = startY; i < startY + H; i++) - { - for (uint32_t j = startX; j < startX + W; j++) - { - std::cout << (int)(matrix[i * cols + j]) << " "; - } - std::cout << std::endl; - } -} - -/** - * @brief Print float matrix, support float or double data type. + * @brief Print a matrix, Matrices supporting the following parameters: float, double, int8_t, + * int16_t, int32_t, int64_t. * * @tparam T data type * @param matrix matrix pointer * @param rows * @param cols - * @param name - * @param precision + * @param name the name of matrix + * @param precision The number of decimal places to be retained. When printing an integer matrix, + * set this parameter to 0. * @param startY * @param startX * @param h * @param w */ template -void print_float_matrix(const T* matrix, - uint32_t rows, - uint32_t cols, - const std::string& name, - uint32_t precision = 2, - uint32_t startY = 0, - uint32_t startX = 0, - uint32_t h = 5, - uint32_t w = 5) +void print_matrix(const T* matrix, + uint32_t rows, + uint32_t cols, + const std::string& name, + uint32_t precision = 2, + uint32_t startY = 0, + uint32_t startX = 0, + uint32_t h = 5, + uint32_t w = 5) { uint32_t H = rows - startY; if (H <= 0) @@ -311,7 +262,15 @@ void print_float_matrix(const T* matrix, { for (uint32_t j = startX; j < startX + W; j++) { - std::cout << matrix[i * cols + j] << " "; + if (precision == 0) + { + // 整数矩阵 + std::cout << (int)(matrix[i * cols + j]) << " "; + } + else + { + std::cout << matrix[i * cols + j] << " "; + } } std::cout << std::endl; } diff --git a/src/usage_demo/print.hpp b/src/usage_demo/print.hpp index a4f4c2c..883cc97 100644 --- a/src/usage_demo/print.hpp +++ b/src/usage_demo/print.hpp @@ -102,8 +102,8 @@ void PrintIntMatrix() // 初始化矩阵 initializeMatrix(matrix, M, N); // 打印矩阵 - cutl::print_int_matrix(matrix, M, N, "int matrix"); - cutl::print_int_matrix(matrix, M, N, "int matrix", 5, 5, M - 5, N - 5); + cutl::print_matrix(matrix, M, N, "int matrix", 0); + cutl::print_matrix(matrix, M, N, "int matrix", 0, 5, 5, M - 5, N - 5); } void PrintFloatMatrix() @@ -114,8 +114,8 @@ void PrintFloatMatrix() // 初始化矩阵 initializeMatrix(matrix, M, N); // 打印矩阵 - cutl::print_float_matrix(matrix, M, N, "float matrix"); - cutl::print_float_matrix(matrix, M, N, "float matrix", 1, 6, 8, M - 6, N - 8); + cutl::print_matrix(matrix, M, N, "float matrix"); + cutl::print_matrix(matrix, M, N, "float matrix", 1, 6, 8, M - 6, N - 8); } void TestPrint() -- Gitee From b0ad6100c15fda99f49498263e95085137b36c80 Mon Sep 17 00:00:00 2001 From: Spencer Date: Sun, 31 Aug 2025 21:05:37 +0800 Subject: [PATCH 05/40] fix: print.h --- include/common_util/print.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/common_util/print.h b/include/common_util/print.h index d01f64a..8005f11 100644 --- a/include/common_util/print.h +++ b/include/common_util/print.h @@ -264,7 +264,7 @@ void print_matrix(const T* matrix, { if (precision == 0) { - // 整数矩阵 + // 整数矩阵,这里强制类型转换,否则int8_t类型会被当成char打印字符 std::cout << (int)(matrix[i * cols + j]) << " "; } else -- Gitee From 49bae4e07520d99ff810b67dbacc1546583a03e9 Mon Sep 17 00:00:00 2001 From: Spencer Date: Fri, 12 Sep 2025 10:27:10 +0800 Subject: [PATCH 06/40] fix: get_last_modified_time_s --- script/build.bat | 3 ++- src/common_util/inner/filesystem_win.cpp | 3 --- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/script/build.bat b/script/build.bat index 9b50d0c..60d1068 100644 --- a/script/build.bat +++ b/script/build.bat @@ -87,7 +87,8 @@ if %~1==build ( ) @REM TODO: 指定cmake的生成器,用户可以替换成自己安装的对应Visual Studio的版本 -set generator="Visual Studio 14 2015" +@REM set generator="Visual Studio 14 2015" +set generator="Visual Studio 17 2022" @REM TODO: 指定CPU核数,用户可以根据自己电脑的CPU核数来调整设置这个参数。使用的cpu核数越多,编译速度越快。 set cpu_cores=6 diff --git a/src/common_util/inner/filesystem_win.cpp b/src/common_util/inner/filesystem_win.cpp index 21d7d25..0b63260 100644 --- a/src/common_util/inner/filesystem_win.cpp +++ b/src/common_util/inner/filesystem_win.cpp @@ -445,9 +445,6 @@ namespace cutl uint64_t get_last_modified_time_s(const std::string& filepath) { std::wstring wide_path = s2ws(filepath); - - // Windows实现 - std::wstring wide_path = utf8_to_wide(path); WIN32_FILE_ATTRIBUTE_DATA fileData; if (!GetFileAttributesExW(wide_path.c_str(), GetFileExInfoStandard, &fileData)) -- Gitee From 1b5bbb70dcded8e1b8702fd28a4631e0045f542a Mon Sep 17 00:00:00 2001 From: Spencer Date: Fri, 12 Sep 2025 10:30:56 +0800 Subject: [PATCH 07/40] fix: print.hpp --- src/usage_demo/print.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/usage_demo/print.hpp b/src/usage_demo/print.hpp index 883cc97..14fe702 100644 --- a/src/usage_demo/print.hpp +++ b/src/usage_demo/print.hpp @@ -96,8 +96,8 @@ void initializeMatrix(int8_t* matrix, int rows, int cols) void PrintIntMatrix() { - int M = 10; - int N = 12; + constexpr int M = 10; + constexpr int N = 12; int8_t matrix[M * N] = { 0 }; // 初始化矩阵 initializeMatrix(matrix, M, N); @@ -108,8 +108,8 @@ void PrintIntMatrix() void PrintFloatMatrix() { - int M = 10; - int N = 12; + constexpr int M = 10; + constexpr int N = 12; float matrix[M * N] = { 0 }; // 初始化矩阵 initializeMatrix(matrix, M, N); -- Gitee From 1502f630a90f011c65c534d72ff33ca3acc93398 Mon Sep 17 00:00:00 2001 From: Spencer Date: Fri, 12 Sep 2025 11:33:11 +0800 Subject: [PATCH 08/40] fix: get_last_modified_time_s --- src/common_util/inner/filesystem_unix.cpp | 2 +- src/usage_demo/fileutil.hpp | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/common_util/inner/filesystem_unix.cpp b/src/common_util/inner/filesystem_unix.cpp index d354359..bf02bb3 100644 --- a/src/common_util/inner/filesystem_unix.cpp +++ b/src/common_util/inner/filesystem_unix.cpp @@ -440,7 +440,7 @@ namespace cutl } // 时间精确到秒 - return static_cast(st.st_mtim.tv_sec); + return static_cast(st.st_mtime); } } // namespace cutl diff --git a/src/usage_demo/fileutil.hpp b/src/usage_demo/fileutil.hpp index f8294fd..a12b5db 100644 --- a/src/usage_demo/fileutil.hpp +++ b/src/usage_demo/fileutil.hpp @@ -284,17 +284,25 @@ void test_rename_and_property() cutl::createdir(basedir, true); // ./test_dir/file_01.txt auto file_01 = basedir.join("file_01.txt"); - cutl::writetext(file_01, "Hello, This is a test file."); + cutl::createfile(file_01); + auto modified_time = cutl::last_modified_time(file_01); - std::cout << "modified time: " << cutl::fmt_timestamp_s(modified_time) << std::endl; + std::cout << "modified time 1(create): " << cutl::fmt_timestamp_s(modified_time) << std::endl; + + // write content + std::this_thread::sleep_for(std::chrono::seconds(3)); + cutl::writetext(file_01, "Hello, This is a test file."); - std::this_thread::sleep_for(std::chrono::seconds(10)); + modified_time = cutl::last_modified_time(file_01); + std::cout << "modified time 2(modify): " << cutl::fmt_timestamp_s(modified_time) << std::endl; + // rename file (文件重命名,不会导致文件最近修改时间的变化) + std::this_thread::sleep_for(std::chrono::seconds(3)); auto file_02 = basedir.join("file_02.txt"); cutl::renamefile(file_01, file_02); modified_time = cutl::last_modified_time(file_02); - std::cout << "modified time: " << cutl::fmt_timestamp_s(modified_time) << std::endl; + std::cout << "modified time 3(rename): " << cutl::fmt_timestamp_s(modified_time) << std::endl; } void TestFileUtil() -- Gitee From ef066d31d01547652c1090781637c62b23a17181 Mon Sep 17 00:00:00 2001 From: Spencer Date: Fri, 12 Sep 2025 17:51:53 +0800 Subject: [PATCH 09/40] fix: print.h --- include/common_util/print.h | 129 ++----------------- include/common_util/strfmt.h | 212 +++++++++++++++++++++++++++++++- include/common_util/timecount.h | 14 +-- src/common_util/timecount.cpp | 8 +- src/usage_demo/main.cpp | 4 +- src/usage_demo/print.hpp | 4 + 6 files changed, 234 insertions(+), 137 deletions(-) diff --git a/include/common_util/print.h b/include/common_util/print.h index 8005f11..1f22cd8 100644 --- a/include/common_util/print.h +++ b/include/common_util/print.h @@ -19,13 +19,10 @@ */ #pragma once +#include "strfmt.h" #include -#include #include -#include #include -#include -#include namespace cutl { @@ -91,18 +88,7 @@ void print_success(const std::string& str); template void print_arr(T* arr, uint32_t size) { - if (size <= 0 || arr == nullptr) - { - std::cout << "[]" << std::endl; - return; - } - std::cout << "[" << std::to_string(arr[0]); - for (uint32_t i = 1; i < size; i++) - { - std::cout << ", " << std::to_string(arr[i]); - } - - std::cout << "]" << std::endl; + std::cout << fmt_arr(arr, size) << std::endl; } /** @@ -114,18 +100,7 @@ void print_arr(T* arr, uint32_t size) template void print_vec(const std::vector& vec) { - if (vec.empty()) - { - std::cout << "[]" << std::endl; - return; - } - std::cout << "[" << std::to_string(vec[0]); - for (int i = 1; i < vec.size(); i++) - { - std::cout << ", " << std::to_string(vec[i]); - } - - std::cout << "]" << std::endl; + std::cout << fmt_vec(vec) << std::endl; } /** @@ -140,32 +115,7 @@ void print_vec(const std::vector& vec) template void print_map(const std::map& mp, bool format = false) { - if (mp.empty()) - { - std::cout << "{}" << std::endl; - return; - } - if (format) - { - std::cout << "{" << std::endl; - for (auto it = mp.begin(); it != mp.end(); it++) - { - std::cout << " " << it->first << ": " << it->second << "," << std::endl; - } - std::cout << "}" << std::endl; - } - else - { - std::cout << "{"; - auto it_begin = mp.begin(); - std::cout << it_begin->first << ": " << it_begin->second; - it_begin++; - for (auto it = it_begin; it != mp.end(); it++) - { - std::cout << ", " << it->first << ": " << it->second; - } - std::cout << "}" << std::endl; - } + std::cout << fmt_map(mp, format) << std::endl; } /** @@ -180,32 +130,8 @@ void print_map(const std::map& mp, bool format = false) template void print_unordered_map(const std::unordered_map& mp, bool format = false) { - if (mp.empty()) - { - std::cout << "{}" << std::endl; - return; - } - if (format) - { - std::cout << "{" << std::endl; - for (auto it = mp.begin(); it != mp.end(); it++) - { - std::cout << " " << it->first << ": " << it->second << "," << std::endl; - } - std::cout << "}" << std::endl; - } - else - { - std::cout << "{"; - auto it_begin = mp.begin(); - std::cout << it_begin->first << ": " << it_begin->second; - it_begin++; - for (auto it = it_begin; it != mp.end(); it++) - { - std::cout << ", " << it->first << ": " << it->second; - } - std::cout << "}" << std::endl; - } + + std::cout << fmt_unordered_map(mp, format) << std::endl; } /** @@ -235,47 +161,8 @@ void print_matrix(const T* matrix, uint32_t h = 5, uint32_t w = 5) { - uint32_t H = rows - startY; - if (H <= 0) - { - std::cout << "startY should less than rows:" << rows << std::endl; - return; - } - uint32_t W = cols - startX; - if (W <= 0) - { - std::cout << "startY should less than rows:" << rows << std::endl; - return; - } - H = std::min(H, h); - W = std::min(W, w); - - // 保存原始cout设置 - std::ios old_state(nullptr); - old_state.copyfmt(std::cout); - // 设置输出格式:固定小数点和指定精度 - std::cout << std::fixed << std::setprecision(precision); - // 打印矩阵 - std::cout << "The sub matrix of " << name << ", from (" << startY << ", " << startX - << "), size: " << h << " * " << w << std::endl; - for (uint32_t i = startY; i < startY + H; i++) - { - for (uint32_t j = startX; j < startX + W; j++) - { - if (precision == 0) - { - // 整数矩阵,这里强制类型转换,否则int8_t类型会被当成char打印字符 - std::cout << (int)(matrix[i * cols + j]) << " "; - } - else - { - std::cout << matrix[i * cols + j] << " "; - } - } - std::cout << std::endl; - } - // 恢复原始cout设置 - std::cout.copyfmt(old_state); + std::cout << fmt_matrix(matrix, rows, cols, name, precision, startY, startX, h, w) + << std::endl; } } // namespace cutl diff --git a/include/common_util/strfmt.h b/include/common_util/strfmt.h index bc21165..3a4f734 100644 --- a/include/common_util/strfmt.h +++ b/include/common_util/strfmt.h @@ -18,11 +18,14 @@ #pragma once -#include +#include "timeutil.h" #include -#include #include -#include "timeutil.h" +#include +#include +#include +#include +#include namespace cutl { @@ -205,4 +208,207 @@ namespace cutl */ std::string to_bin(uint64_t value, char separator = ' '); + /** + * @brief Format array to string with basic data type, such as int, float, double, char, etc. + * + * @tparam T the data type of array's element + * @param arr array + * @param size size of array + */ + template + std::string fmt_arr(T* arr, uint32_t size) + { + if (size <= 0 || arr == nullptr) + { + return "[]"; + } + std::string res("[" + std::to_string(arr[0])); + for (uint32_t i = 1; i < size; i++) + { + res += ", " + std::to_string(arr[i]); + } + + res += "]"; + return res; + } + + /** + * @brief Format simple vector to string with basic data type, such as int, float, double, + * char, string etc. + * + * @tparam T the data type of vector's element + * @param vec vector's object + */ + template + std::string fmt_vec(const std::vector& vec) + { + if (vec.empty()) + { + return "[]"; + } + + std::string res("[" + std::to_string(vec[0])); + for (int i = 1; i < vec.size(); i++) + { + res += ", " + std::to_string(vec[i]); + } + res += "]"; + + return res; + } + + /** + * @brief Format simple map to string with basic data type, such as int, float, double, char, + string etc. + * + * @tparam K the data type of key in map + * @tparam V the data type of value in map + * @param mp map's object + * @param format wheather format the output or not, if true, use multi-line output, + otherwise, + * use one-line output + */ + template + std::string fmt_map(const std::map& mp, bool format = false) + { + if (mp.empty()) + { + return "{}"; + } + + std::stringstream ss; + if (format) + { + ss << "{\n"; + for (auto it = mp.begin(); it != mp.end(); it++) + { + ss << " " << it->first << ": " << it->second << ",\n"; + } + } + else + { + ss << "{"; + auto it_begin = mp.begin(); + ss << it_begin->first << ": " << it_begin->second; + it_begin++; + for (auto it = it_begin; it != mp.end(); it++) + { + ss << ", " << it->first << ": " << it->second; + } + } + ss << "}"; + + return ss.str(); + } + + /** + * @brief Format simple unordered_map to string with basic data type, such as int, float, + double, char, string etc. + * + * @tparam K the data type of key in map + * @tparam V the data type of value in map + * @param mp map's object + * @param format wheather format the output or not, if true, use multi-line output, + otherwise, + * use one-line output + */ + template + std::string fmt_unordered_map(const std::unordered_map& mp, bool format = false) + { + if (mp.empty()) + { + return "{}"; + } + + std::stringstream ss; + if (format) + { + ss << "{\n"; + for (auto it = mp.begin(); it != mp.end(); it++) + { + ss << " " << it->first << ": " << it->second << ",\n"; + } + } + else + { + ss << "{"; + auto it_begin = mp.begin(); + ss << it_begin->first << ": " << it_begin->second; + it_begin++; + for (auto it = it_begin; it != mp.end(); it++) + { + ss << ", " << it->first << ": " << it->second; + } + } + ss << "}"; + + return ss.str(); + } + + /** + * @brief Format a matrix to string, Matrices supporting the following parameters: float, + double, int8_t, int16_t, int32_t, int64_t. + * + * @tparam T data type + * @param matrix matrix pointer + * @param rows + * @param cols + * @param name the name of matrix + * @param precision The number of decimal places to be retained. When printing an integer + * matrix, set this parameter to 0. + * @param startY + * @param startX + * @param h + * @param w + */ + template + std::string fmt_matrix(const T* matrix, + uint32_t rows, + uint32_t cols, + const std::string& name, + uint32_t precision = 2, + uint32_t startY = 0, + uint32_t startX = 0, + uint32_t h = 5, + uint32_t w = 5) + { + uint32_t H = rows - startY; + if (H <= 0) + { + return "startY should less than rows:" + rows; + } + uint32_t W = cols - startX; + if (W <= 0) + { + return "startX should less than cols:" + cols; + } + H = std::min(H, h); + W = std::min(W, w); + + std::stringstream ss; + // 设置输出格式:固定小数点和指定精度 + ss << std::fixed << std::setprecision(precision); + // 打印矩阵 + ss << "The sub matrix of " << name << ", start: (" << startY << ", " << startX + << "), size: " << h << " * " << w << "\n"; + for (uint32_t i = startY; i < startY + H; i++) + { + for (uint32_t j = startX; j < startX + W; j++) + { + if (precision == 0) + { + // 整数矩阵,这里强制类型转换,否则int8_t类型会被当成char打印字符 + ss << (int)(matrix[i * cols + j]) << " "; + } + else + { + ss << matrix[i * cols + j] << " "; + } + } + ss << "\n"; + } + + return ss.str(); + } + } // namespace \ No newline at end of file diff --git a/include/common_util/timecount.h b/include/common_util/timecount.h index e39dda1..a8d2bdd 100644 --- a/include/common_util/timecount.h +++ b/include/common_util/timecount.h @@ -36,30 +36,30 @@ namespace cutl * @brief A simple time counter class for measure the execution time of a function. * */ -class timecount +class steady_timecounter { public: /** - * @brief Construct a new timecount object + * @brief Construct a new steady_timecounter object * The constructor will record the begin time of the function calling. * @param func_name */ - timecount(const std::string& func_name); + steady_timecounter(const std::string& func_name); /** - * @brief Destroy the timecount object + * @brief Destroy the steady_timecounter object * The desctructor will record the end time of the function calling and calculate the execution * time. */ - ~timecount(); + ~steady_timecounter(); private: std::string func_name_; std::atomic start_time_; }; -// Rename `timecount` to `steady_timecounter`, In order to be compatible with older versions, +// Rename `steady_timecounter` to `timecount`, In order to be compatible with older versions, // retain the original name. -using steady_timecounter = timecount; +using timecount = steady_timecounter; /** * @brief A time counter class for counting the CPU usage time of a function diff --git a/src/common_util/timecount.cpp b/src/common_util/timecount.cpp index 2b528e7..a7dcaed 100644 --- a/src/common_util/timecount.cpp +++ b/src/common_util/timecount.cpp @@ -26,17 +26,17 @@ namespace cutl { -timecount::timecount(const std::string& func_name) +steady_timecounter::steady_timecounter(const std::string& func_name) : func_name_(func_name) , start_time_(clocktime(timeunit::us)) { } -timecount::~timecount() +steady_timecounter::~steady_timecounter() { auto end_time = clocktime(timeunit::us); auto duration = end_time - start_time_; - auto text = "[timecount] " + func_name_ + " used " + fmt_timeduration_us(duration); + auto text = "[timecounter] " + func_name_ + " used " + fmt_timeduration_us(duration); CUTL_LOGGER.info("", text); } @@ -52,7 +52,7 @@ cpu_timecounter::~cpu_timecounter() uint64_t end_time = cpu_clocktime(timeunit::us); uint64_t duration = end_time - start_time_; - auto text = "[timecount] " + func_name_ + " used " + fmt_timeduration_us(duration); + auto text = "[timecounter] " + func_name_ + " used " + fmt_timeduration_us(duration); CUTL_LOGGER.info("", text); } diff --git a/src/usage_demo/main.cpp b/src/usage_demo/main.cpp index 598adc5..5196311 100644 --- a/src/usage_demo/main.cpp +++ b/src/usage_demo/main.cpp @@ -31,7 +31,7 @@ void usage_demo() std::cout << "current time(local time): " << now.format() << std::endl; } -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { LibraryInit(); // TestSingleton(); @@ -39,7 +39,7 @@ int main(int argc, char *argv[]) // TestSysutil(); // TestStrfmt(); // TestTimeutil(); - // TestTimecount(); + TestTimecount(); // TestDatetime(); // TestVerUtil(); // TestStrUtil(); diff --git a/src/usage_demo/print.hpp b/src/usage_demo/print.hpp index 14fe702..88b3136 100644 --- a/src/usage_demo/print.hpp +++ b/src/usage_demo/print.hpp @@ -96,6 +96,8 @@ void initializeMatrix(int8_t* matrix, int rows, int cols) void PrintIntMatrix() { + PrintSubTitle("print_int_matrix"); + constexpr int M = 10; constexpr int N = 12; int8_t matrix[M * N] = { 0 }; @@ -108,6 +110,8 @@ void PrintIntMatrix() void PrintFloatMatrix() { + PrintSubTitle("print_float_matrix"); + constexpr int M = 10; constexpr int N = 12; float matrix[M * N] = { 0 }; -- Gitee From 410f0dd7c8062e939c4dec294b5817b39b5f4844 Mon Sep 17 00:00:00 2001 From: Spencer Date: Fri, 12 Sep 2025 21:04:59 +0800 Subject: [PATCH 10/40] feat: add fmt_arr --- include/common_util/print.h | 15 +++++++++++++++ include/common_util/strfmt.h | 29 +++++++++++++++++++++++++++++ src/usage_demo/print.hpp | 7 +++++-- 3 files changed, 49 insertions(+), 2 deletions(-) diff --git a/include/common_util/print.h b/include/common_util/print.h index 1f22cd8..97bbf69 100644 --- a/include/common_util/print.h +++ b/include/common_util/print.h @@ -91,6 +91,21 @@ void print_arr(T* arr, uint32_t size) std::cout << fmt_arr(arr, size) << std::endl; } +/** + * @brief Print array to string. support basic data type, such as int, float, double, char, + * etc. + * + * @tparam T the data type of array's element + * @tparam _Nm + * @param arr array + * @return std::string + */ +template +void print_arr(const std::array& arr) +{ + std::cout << fmt_arr(arr) << std::endl; +} + /** * @brief Print simple vector with basic data type, such as int, float, double, char, string etc. * diff --git a/include/common_util/strfmt.h b/include/common_util/strfmt.h index 3a4f734..4f18fd8 100644 --- a/include/common_util/strfmt.h +++ b/include/common_util/strfmt.h @@ -19,6 +19,7 @@ #pragma once #include "timeutil.h" +#include #include #include #include @@ -222,13 +223,41 @@ namespace cutl { return "[]"; } + std::string res("[" + std::to_string(arr[0])); for (uint32_t i = 1; i < size; i++) { res += ", " + std::to_string(arr[i]); } + res += "]"; + + return res; + } + /** + * @brief Format array to string. support basic data type, such as int, float, double, char, + * etc. + * + * @tparam T the data type of array's element + * @tparam _Nm + * @param arr array + * @return std::string + */ + template + std::string fmt_arr(const std::array& arr) + { + if (arr.empty()) + { + return "[]"; + } + + std::string res("[" + std::to_string(arr[0])); + for (uint32_t i = 1; i < arr.size(); i++) + { + res += ", " + std::to_string(arr[i]); + } res += "]"; + return res; } diff --git a/src/usage_demo/print.hpp b/src/usage_demo/print.hpp index 88b3136..9c31866 100644 --- a/src/usage_demo/print.hpp +++ b/src/usage_demo/print.hpp @@ -5,8 +5,11 @@ void PrintArr() { PrintSubTitle("print_arr"); - int8_t arr[] = { 1, -2, 3 }; - cutl::print_arr(arr, 3); + int8_t arr1[] = { 1, -2, 3 }; + cutl::print_arr(arr1, 3); + + std::array arr2 = { 1.21, 3.14, 5.22, 6.33 }; + cutl::print_arr(arr2); } void PrintVec() -- Gitee From 156c6f02020764d9d440fa48a09bb77a8d4a4110 Mon Sep 17 00:00:00 2001 From: Spencer Date: Sat, 13 Sep 2025 21:27:42 +0800 Subject: [PATCH 11/40] feat: add fmt_set/fmt_unordered_set --- include/common_util/print.h | 25 ++++++++++++++++ include/common_util/strfmt.h | 55 ++++++++++++++++++++++++++++++++++++ src/usage_demo/print.hpp | 20 ++++++++++++- 3 files changed, 99 insertions(+), 1 deletion(-) diff --git a/include/common_util/print.h b/include/common_util/print.h index 97bbf69..4f994b1 100644 --- a/include/common_util/print.h +++ b/include/common_util/print.h @@ -149,6 +149,31 @@ void print_unordered_map(const std::unordered_map& mp, bool format = false std::cout << fmt_unordered_map(mp, format) << std::endl; } +/** + * @brief Print simple set with basic data type, such as int, float, double, char, string etc. + * + * @tparam T the data type of set's element + * @param s std::set's object + */ +template +void print_set(const std::set& s) +{ + std::cout << fmt_set(s) << std::endl; +} + +/** + * @brief Print simple unordered_set with basic data type, such as int, float, double, char, string + * etc. + * + * @tparam T the data type of set's element + * @param s std::set's object + */ +template +void print_unordered_set(const std::unordered_set& s) +{ + std::cout << fmt_unordered_set(s) << std::endl; +} + /** * @brief Print a matrix, Matrices supporting the following parameters: float, double, int8_t, * int16_t, int32_t, int64_t. diff --git a/include/common_util/strfmt.h b/include/common_util/strfmt.h index 4f18fd8..7b0ef9b 100644 --- a/include/common_util/strfmt.h +++ b/include/common_util/strfmt.h @@ -23,9 +23,11 @@ #include #include #include +#include #include #include #include +#include #include namespace cutl @@ -374,6 +376,59 @@ namespace cutl return ss.str(); } + /** + * @brief Format simple set with basic data type, such as int, float, double, char, string etc. + * + * @tparam T the data type of set's element + * @param s std::set's object + */ + template + std::string fmt_set(const std::set& s) + { + if (s.empty()) + { + return "{}"; + } + + auto itr = s.begin(); + std::string res("{" + std::to_string(*itr)); + itr++; + for (; itr != s.end(); itr++) + { + res += ", " + std::to_string(*itr); + } + res += "}"; + + return res; + } + + /** + * @brief Format simple unordered_set with basic data type, such as int, float, double, char, + * string etc. + * + * @tparam T the data type of set's element + * @param s std::set's object + */ + template + std::string fmt_unordered_set(const std::unordered_set& s) + { + if (s.empty()) + { + return "{}"; + } + + auto itr = s.begin(); + std::string res("{" + std::to_string(*itr)); + itr++; + for (; itr != s.end(); itr++) + { + res += ", " + std::to_string(*itr); + } + res += "}"; + + return res; + } + /** * @brief Format a matrix to string, Matrices supporting the following parameters: float, double, int8_t, int16_t, int32_t, int64_t. diff --git a/src/usage_demo/print.hpp b/src/usage_demo/print.hpp index 9c31866..3baf545 100644 --- a/src/usage_demo/print.hpp +++ b/src/usage_demo/print.hpp @@ -5,7 +5,7 @@ void PrintArr() { PrintSubTitle("print_arr"); - int8_t arr1[] = { 1, -2, 3 }; + int8_t arr1[] = { 65, -2, 98 }; cutl::print_arr(arr1, 3); std::array arr2 = { 1.21, 3.14, 5.22, 6.33 }; @@ -44,6 +44,22 @@ void PrintUnorderedMap() cutl::print_unordered_map(map1, true); } +void PrintSet() +{ + PrintSubTitle("print_set"); + + std::set s = { 5, 2, 8, 1, 9, 3 }; + cutl::print_set(s); +} + +void PrintUnorderedSet() +{ + PrintSubTitle("print_unordered_set"); + + std::unordered_set s = { 5, 2, 8, 1, 9, 3 }; + cutl::print_unordered_set(s); +} + void PrintColor() { PrintSubTitle("print_clr"); @@ -134,6 +150,8 @@ void TestPrint() PrintVec(); PrintMap(); PrintUnorderedMap(); + PrintSet(); + PrintUnorderedSet(); PrintIntMatrix(); PrintFloatMatrix(); } -- Gitee From 23e1ee062d645b0a7afedd5368403bb6dbaad058 Mon Sep 17 00:00:00 2001 From: Spencer Date: Sun, 14 Sep 2025 20:38:42 +0800 Subject: [PATCH 12/40] refactor: modify file encoding from utf-8 to 'utf-8 BOM' --- .vscode/settings.json | 4 ++++ CMakeLists.txt | 2 +- include/common_util/algoutil.h | 2 +- include/common_util/eventloop.h | 2 +- include/common_util/lrucache.h | 2 +- include/common_util/strfmt.h | 2 +- include/common_util/threadpool.h | 2 +- include/common_util/timer.h | 2 +- src/common_util/datetime.cpp | 2 +- src/common_util/eventloop.cpp | 2 +- src/common_util/filepath.cpp | 2 +- src/common_util/inner/filesystem.h | 2 +- src/common_util/inner/filesystem_unix.cpp | 2 +- src/common_util/inner/filesystem_win.cpp | 2 +- src/common_util/inner/time_util.h | 2 +- src/common_util/inner/time_util_unix.cpp | 2 +- src/common_util/inner/time_util_win.cpp | 2 +- src/common_util/print.cpp | 2 +- src/common_util/strutil.cpp | 2 +- src/common_util/threadpool.cpp | 2 +- src/common_util/threadutil.cpp | 2 +- src/common_util/timecount.cpp | 2 +- src/common_util/timer.cpp | 2 +- src/common_util/timeutil.cpp | 2 +- src/usage_demo/datetime.hpp | 2 +- src/usage_demo/dlloader.hpp | 2 +- src/usage_demo/eventloop.hpp | 2 +- src/usage_demo/filepath.hpp | 2 +- src/usage_demo/fileutil.hpp | 2 +- src/usage_demo/lrucache.hpp | 2 +- src/usage_demo/print.hpp | 2 +- src/usage_demo/threadpool.hpp | 2 +- src/usage_demo/timecount.hpp | 2 +- src/usage_demo/timer.hpp | 2 +- 34 files changed, 37 insertions(+), 33 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 8111a7d..b93f050 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -5,6 +5,10 @@ "editor.minimap.enabled": true, // 开启|禁用缩进(Tab)字符字符检测,可选值: true, false "editor.detectIndentation": true, + // 将文件的默认保存格式设置'UTF-8 BOM' + "files.encoding": "utf8bom", + // 在保存新文件时插入 BOM + "files.insertBOM": true, // 将文件(或拓展名)与某种编程语言关联 "files.associations": { "*.in": "c", diff --git a/CMakeLists.txt b/CMakeLists.txt index e33a44c..5039245 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -53,7 +53,7 @@ target_include_directories(${PROJECT_NAME} $ # 安装后使用安装目录的 include PRIVATE # 仅 当前项目 可见(可以使用) $ - $/src/common_util + $ ) # 如果是windows,设置 管理员权限 和 UAC绕过UI保护 diff --git a/include/common_util/algoutil.h b/include/common_util/algoutil.h index 12d2244..6922358 100644 --- a/include/common_util/algoutil.h +++ b/include/common_util/algoutil.h @@ -1,4 +1,4 @@ -/** +/** * @copyright Copyright (c) 2025, Spencer.Luo. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/include/common_util/eventloop.h b/include/common_util/eventloop.h index 6996fb9..9d87b3d 100644 --- a/include/common_util/eventloop.h +++ b/include/common_util/eventloop.h @@ -1,4 +1,4 @@ -/** +/** * @copyright Copyright (c) 2025, Spencer.Luo. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/include/common_util/lrucache.h b/include/common_util/lrucache.h index 5115ab5..2a0ceb5 100644 --- a/include/common_util/lrucache.h +++ b/include/common_util/lrucache.h @@ -1,4 +1,4 @@ -/** +/** * @copyright Copyright (c) 2025, Spencer.Luo. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/include/common_util/strfmt.h b/include/common_util/strfmt.h index 7b0ef9b..5a733de 100644 --- a/include/common_util/strfmt.h +++ b/include/common_util/strfmt.h @@ -1,4 +1,4 @@ -/** +/** * @copyright Copyright (c) 2024, Spencer.Luo. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/include/common_util/threadpool.h b/include/common_util/threadpool.h index 6333314..a6abfc1 100644 --- a/include/common_util/threadpool.h +++ b/include/common_util/threadpool.h @@ -1,4 +1,4 @@ -/** +/** * @copyright Copyright (c) 2025, Spencer.Luo. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/include/common_util/timer.h b/include/common_util/timer.h index 64c0301..2466402 100644 --- a/include/common_util/timer.h +++ b/include/common_util/timer.h @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include #include diff --git a/src/common_util/datetime.cpp b/src/common_util/datetime.cpp index 83eeff9..268f309 100644 --- a/src/common_util/datetime.cpp +++ b/src/common_util/datetime.cpp @@ -1,4 +1,4 @@ -/** +/** * @copyright Copyright (c) 2024, Spencer.Luo. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/common_util/eventloop.cpp b/src/common_util/eventloop.cpp index 9ba07aa..9f9221d 100644 --- a/src/common_util/eventloop.cpp +++ b/src/common_util/eventloop.cpp @@ -1,4 +1,4 @@ -#include "eventloop.h" +#include "eventloop.h" #include "inner/logger.h" #include "threadutil.h" diff --git a/src/common_util/filepath.cpp b/src/common_util/filepath.cpp index fe26e25..7ad8aaa 100644 --- a/src/common_util/filepath.cpp +++ b/src/common_util/filepath.cpp @@ -1,4 +1,4 @@ -/** +/** * @copyright Copyright (c) 2024, Spencer.Luo. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/common_util/inner/filesystem.h b/src/common_util/inner/filesystem.h index 637295e..b3df2cd 100644 --- a/src/common_util/inner/filesystem.h +++ b/src/common_util/inner/filesystem.h @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include #include diff --git a/src/common_util/inner/filesystem_unix.cpp b/src/common_util/inner/filesystem_unix.cpp index bf02bb3..dd038e8 100644 --- a/src/common_util/inner/filesystem_unix.cpp +++ b/src/common_util/inner/filesystem_unix.cpp @@ -1,4 +1,4 @@ -#if defined(_WIN32) +#if defined(_WIN32) // do nothing #else diff --git a/src/common_util/inner/filesystem_win.cpp b/src/common_util/inner/filesystem_win.cpp index 0b63260..bba009a 100644 --- a/src/common_util/inner/filesystem_win.cpp +++ b/src/common_util/inner/filesystem_win.cpp @@ -1,4 +1,4 @@ -#if defined(_WIN32) +#if defined(_WIN32) #include "filesystem.h" #include "logger.h" diff --git a/src/common_util/inner/time_util.h b/src/common_util/inner/time_util.h index 16da452..1ada67d 100644 --- a/src/common_util/inner/time_util.h +++ b/src/common_util/inner/time_util.h @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include diff --git a/src/common_util/inner/time_util_unix.cpp b/src/common_util/inner/time_util_unix.cpp index 61b0820..790b034 100644 --- a/src/common_util/inner/time_util_unix.cpp +++ b/src/common_util/inner/time_util_unix.cpp @@ -1,4 +1,4 @@ -#if defined(_WIN32) +#if defined(_WIN32) // do nothing #else diff --git a/src/common_util/inner/time_util_win.cpp b/src/common_util/inner/time_util_win.cpp index 83bf79d..7390c40 100644 --- a/src/common_util/inner/time_util_win.cpp +++ b/src/common_util/inner/time_util_win.cpp @@ -1,4 +1,4 @@ -#if defined(_WIN32) +#if defined(_WIN32) #include #include "system_util.h" diff --git a/src/common_util/print.cpp b/src/common_util/print.cpp index ad5a052..ed19abf 100644 --- a/src/common_util/print.cpp +++ b/src/common_util/print.cpp @@ -1,4 +1,4 @@ -#include "print.h" +#include "print.h" #include #if defined(_WIN32) #include diff --git a/src/common_util/strutil.cpp b/src/common_util/strutil.cpp index f074ec6..356ba68 100644 --- a/src/common_util/strutil.cpp +++ b/src/common_util/strutil.cpp @@ -1,4 +1,4 @@ -/** +/** * @copyright Copyright (c) 2024, Spencer.Luo. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/common_util/threadpool.cpp b/src/common_util/threadpool.cpp index 0f8877c..2d398f1 100644 --- a/src/common_util/threadpool.cpp +++ b/src/common_util/threadpool.cpp @@ -1,4 +1,4 @@ -#include "threadpool.h" +#include "threadpool.h" #include "algoutil.h" #include "inner/logger.h" #include "threadutil.h" diff --git a/src/common_util/threadutil.cpp b/src/common_util/threadutil.cpp index 89e5119..418481f 100644 --- a/src/common_util/threadutil.cpp +++ b/src/common_util/threadutil.cpp @@ -1,4 +1,4 @@ -#include "threadutil.h" +#include "threadutil.h" #include "inner/logger.h" #include "strutil.h" diff --git a/src/common_util/timecount.cpp b/src/common_util/timecount.cpp index a7dcaed..cf19997 100644 --- a/src/common_util/timecount.cpp +++ b/src/common_util/timecount.cpp @@ -1,4 +1,4 @@ -/** +/** * @copyright Copyright (c) 2024, Spencer.Luo. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/common_util/timer.cpp b/src/common_util/timer.cpp index 60cb926..08091f8 100644 --- a/src/common_util/timer.cpp +++ b/src/common_util/timer.cpp @@ -1,4 +1,4 @@ -#include "timer.h" +#include "timer.h" #include "inner/logger.h" #include "threadutil.h" diff --git a/src/common_util/timeutil.cpp b/src/common_util/timeutil.cpp index 7cfa0e1..07a18f5 100644 --- a/src/common_util/timeutil.cpp +++ b/src/common_util/timeutil.cpp @@ -1,4 +1,4 @@ -/** +/** * @copyright Copyright (c) 2024, Spencer.Luo. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/usage_demo/datetime.hpp b/src/usage_demo/datetime.hpp index 57cd358..aa224a7 100644 --- a/src/usage_demo/datetime.hpp +++ b/src/usage_demo/datetime.hpp @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include "common.hpp" #include "common_util/datetime.h" diff --git a/src/usage_demo/dlloader.hpp b/src/usage_demo/dlloader.hpp index ba78fdb..8066349 100644 --- a/src/usage_demo/dlloader.hpp +++ b/src/usage_demo/dlloader.hpp @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include "common.hpp" #include "common_util/dlloader.h" diff --git a/src/usage_demo/eventloop.hpp b/src/usage_demo/eventloop.hpp index eff3d84..6744bd7 100644 --- a/src/usage_demo/eventloop.hpp +++ b/src/usage_demo/eventloop.hpp @@ -1,4 +1,4 @@ -#include "common.hpp" +#include "common.hpp" #include "common_util/datetime.h" #include "common_util/eventloop.h" diff --git a/src/usage_demo/filepath.hpp b/src/usage_demo/filepath.hpp index 7934de0..84f82b2 100644 --- a/src/usage_demo/filepath.hpp +++ b/src/usage_demo/filepath.hpp @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include "common.hpp" #include "common_util/fileutil.h" diff --git a/src/usage_demo/fileutil.hpp b/src/usage_demo/fileutil.hpp index a12b5db..1e12951 100644 --- a/src/usage_demo/fileutil.hpp +++ b/src/usage_demo/fileutil.hpp @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include "common.hpp" #include "common_util/fileutil.h" diff --git a/src/usage_demo/lrucache.hpp b/src/usage_demo/lrucache.hpp index 2dbaed9..c959cbc 100644 --- a/src/usage_demo/lrucache.hpp +++ b/src/usage_demo/lrucache.hpp @@ -1,4 +1,4 @@ -#include "common.hpp" +#include "common.hpp" #include "common_util/lrucache.h" void case_01_02() diff --git a/src/usage_demo/print.hpp b/src/usage_demo/print.hpp index 3baf545..4081091 100644 --- a/src/usage_demo/print.hpp +++ b/src/usage_demo/print.hpp @@ -1,4 +1,4 @@ -#include "common.hpp" +#include "common.hpp" #include "common_util/print.h" void PrintArr() diff --git a/src/usage_demo/threadpool.hpp b/src/usage_demo/threadpool.hpp index b0dbbb6..50b6a64 100644 --- a/src/usage_demo/threadpool.hpp +++ b/src/usage_demo/threadpool.hpp @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include "common.hpp" #include "common_util/datetime.h" diff --git a/src/usage_demo/timecount.hpp b/src/usage_demo/timecount.hpp index ad577f9..db9dc47 100644 --- a/src/usage_demo/timecount.hpp +++ b/src/usage_demo/timecount.hpp @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include "common.hpp" #include "common_util/timecount.h" diff --git a/src/usage_demo/timer.hpp b/src/usage_demo/timer.hpp index bf3856b..2cbc96e 100644 --- a/src/usage_demo/timer.hpp +++ b/src/usage_demo/timer.hpp @@ -1,4 +1,4 @@ -#include "common.hpp" +#include "common.hpp" #include "common_util/threadutil.h" #include "common_util/timer.h" #include -- Gitee From 002f4d4a8f9fce3ada2a68d86556f86eee710f15 Mon Sep 17 00:00:00 2001 From: Spencer Date: Mon, 15 Sep 2025 11:42:31 +0800 Subject: [PATCH 13/40] fix: CMakeLists.txt --- CMakeLists.txt | 8 ++++++-- script/build.bat | 2 +- script/build.sh | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5039245..e08f4ca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -106,9 +106,13 @@ elseif() message("current build is debug") endif() +# 默认不编译usage_demo,cmake命令行参数传递BUILD_DEMO=on时才会编译 +option(BUILD_DEMO "Need to build usage_demo" OFF) -# add sub moudle for usage_demo -add_subdirectory(src/usage_demo) +if (BUILD_DEMO) + # add sub moudle for usage_demo + add_subdirectory(src/usage_demo) +endif() # # 打包安装 # # pack and install diff --git a/script/build.bat b/script/build.bat index 60d1068..2382134 100644 --- a/script/build.bat +++ b/script/build.bat @@ -98,7 +98,7 @@ if %~1==build ( echo Building... @REM configure - cmake --no-warn-unused-cli -DCMAKE_BUILD_TYPE:STRING=%buildType% -S ./ -B ./build -G %generator% -A %platform% + cmake --no-warn-unused-cli -DCMAKE_BUILD_TYPE:STRING=%buildType% -S ./ -B ./build -G %generator% -A %platform% -DBUILD_DEMO=ON @REM build cmake --build ./build --config %buildType% --target ALL_BUILD -j %cpu_cores% -- ) diff --git a/script/build.sh b/script/build.sh index c1def93..5b6a572 100755 --- a/script/build.sh +++ b/script/build.sh @@ -28,7 +28,7 @@ if [ $1 == "build" ]; then fi echo "Building..." # cmake -B ./build -DCPACK_OUTPUT_FILE_PREFIX=`pwd`/dest - cmake -B ./build -DCMAKE_BUILD_TYPE:STRING=${build_type} + cmake -B ./build -DCMAKE_BUILD_TYPE:STRING=${build_type} -DBUILD_DEMO=ON cd build make echo "Build Done." -- Gitee From 4b2508da5196e1d4cbd6475c4b4d7976c15adc4c Mon Sep 17 00:00:00 2001 From: Spencer Date: Fri, 10 Oct 2025 20:36:49 +0800 Subject: [PATCH 14/40] feat: add bitmap.cpp --- src/usage_demo/bitmap.cpp | 485 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 485 insertions(+) create mode 100644 src/usage_demo/bitmap.cpp diff --git a/src/usage_demo/bitmap.cpp b/src/usage_demo/bitmap.cpp new file mode 100644 index 0000000..e6ab31a --- /dev/null +++ b/src/usage_demo/bitmap.cpp @@ -0,0 +1,485 @@ +#include +#include +#include +#include +#include +#include + +class Bitmap +{ +protected: + std::vector bits_; + size_t size_; + +public: + Bitmap(size_t size) + : size_(size) + { + // 计算需要的字节数,向上取整: size/8 + // 注意:这里必须除以8.0,才能获取浮点数然后向上取整 + bits_.resize(std::ceil(size / 8.0), 0); + } + + ~Bitmap() = default; + + /** + * 设置指定位置为 1 + */ + void set(size_t position) + { + if (position >= size_) + { + throw std::out_of_range("Position " + std::to_string(position) + " out of range"); + } + + size_t byteIndex = position >> 3; // 等价于 position / 8 + size_t bitIndex = position & 0x7; // 等价于 position % 8 + bits_[byteIndex] |= (1 << bitIndex); + } + + /** + * 获取指定位置的值 + */ + bool get(size_t position) const + { + if (position >= size_) + { + throw std::out_of_range("Position " + std::to_string(position) + " out of range"); + } + size_t byteIndex = position >> 3; // 等价于 position / 8 + size_t bitIndex = position & 0x7; // 等价于 position % 8 + return (bits_[byteIndex] & (1 << bitIndex)) != 0; + } + + /** + * 重载 [] 操作符 + */ + bool operator[](size_t position) const { return get(position); } + + /** + * 设置指定位置为 0 + */ + void reset(size_t position) + { + if (position >= size_) + { + throw std::out_of_range("Position " + std::to_string(position) + " out of range"); + } + size_t byteIndex = position >> 3; // 等价于 position / 8 + size_t bitIndex = position & 0x7; // 等价于 position % 8 + bits_[byteIndex] &= ~(1 << bitIndex); // 对应的bit设置为0 + } + + /** + * @brief 获取数值为1的位数 + * + * @return size_t + */ + size_t count() + { + size_t count = 0; + for (size_t i = 0; i < bits_.size(); i++) + { + uint8_t byte = bits_[i] & 0xff; + for (size_t j = 0; j < 8; j++) + { + // 方法一 + if ((byte & (1 << j)) != 0) + { + count++; + } + // 方法二 + // if (((byte >>> j) & 0x01) == 1) + // { + // count ++; + // } + } + } + return count; + } + + /** + * 获取 Bitmap 的大小(bit 数) + */ + size_t size() const { return size_; } + + /** + * 判断是否相等 + */ + bool operator==(const Bitmap& other) const + { + if (size_ != other.size_) + return false; + for (size_t i = 0; i < bits_.size(); i++) + { + if (bits_[i] != other.bits_[i]) + return false; + } + return true; + } + + /** + * 判断是否不相等 + */ + bool operator!=(const Bitmap& other) const + { + bool equal = (*this == other); + return !equal; + } + + /** + * 与另一个 Bitmap 进行 AND 操作 + */ + Bitmap operator&(const Bitmap& other) const + { + if (this->size_ != other.size_) + { + throw std::invalid_argument("Bitmaps must have same size"); + } + Bitmap result(size_); + for (size_t i = 0; i < bits_.size(); i++) + { + result.bits_[i] = bits_[i] & other.bits_[i]; + } + return result; + } + + /** + * 与另一个 Bitmap 进行 OR 操作 + */ + Bitmap operator|(const Bitmap& other) const + { + if (this->size_ != other.size_) + { + throw std::invalid_argument("Bitmaps must have same size"); + } + Bitmap result(size_); + for (size_t i = 0; i < bits_.size(); i++) + { + result.bits_[i] = bits_[i] | other.bits_[i]; + } + return result; + } +}; + +class DynamicBitmap : public Bitmap +{ +public: + DynamicBitmap(size_t init_size = 64) + : Bitmap(init_size) + { + } + + ~DynamicBitmap() = default; + +public: + /** + * 设置指定位置为 1 + */ + void set(size_t position) + { + ensureCapacity(position + 1); + + size_t byteIndex = position >> 3; // 等价于 position / 8 + size_t bitIndex = position & 0x7; // 等价于 position % 8 + bits_[byteIndex] |= (1 << bitIndex); + } + +private: + /** + * 动态扩容 + */ + void ensureCapacity(size_t minSize) + { + if (minSize <= size_) + return; + + size_ = std::max(size_ * 2, minSize); + bits_.resize(size_); + } +}; + +class RoaringBitmap +{ +private: + size_t block_size_{ 0 }; + std::unordered_map container_; + +public: + RoaringBitmap(size_t blockSize) + : block_size_(blockSize) + { + } + + ~RoaringBitmap() = default; + +public: + size_t block_size() const { return block_size_; } + + /** + * 设置指定位置为 1 + */ + void set(size_t position) + { + size_t key = position / block_size_; + size_t bitPosition = position % block_size_; + + if (!container_.count(key)) + { + // block不存在则添加新的block + container_.emplace(key, Bitmap(block_size_)); + } + container_.at(key).set(bitPosition); + } + + /** + * 获取指定位置的值 + */ + bool get(size_t position) const + { + size_t key = position / block_size_; + size_t bitPosition = position % block_size_; + + auto itr = container_.find(key); + if (itr != container_.end()) + { + return false; + } + return itr->second.get(bitPosition); + } + + /** + * 重载 [] 操作符 + */ + bool operator[](size_t position) const { return get(position); } + + /** + * 设置指定位置为 0 + */ + void reset(size_t position) + { + size_t key = position / block_size_; + size_t bitPosition = position % block_size_; + + auto itr = container_.find(key); + if (itr != container_.end()) + { + throw std::out_of_range("Position " + std::to_string(position) + " not in container"); + } + + itr->second.reset(position); + } + + /** + * @brief 获取数值为1的位数 + * + * @return size_t + */ + size_t count() + { + size_t count = 0; + for (auto itr = container_.begin(); itr != container_.end(); itr++) + { + count += itr->second.count(); + } + return count; + } + + /** + * 获取 Bitmap 的大小(bit 数) + */ + size_t size() const + { + size_t size = 0; + for (auto itr = container_.begin(); itr != container_.end(); itr++) + { + size += itr->second.size(); + } + return size; + } + + /** + * 与另一个 Bitmap 进行 AND 操作 + */ + RoaringBitmap operator&(const RoaringBitmap& other) const + { + if (block_size() != other.block_size()) + { + throw std::invalid_argument("RoaringBitmap must have same block_size"); + } + if (container_.size() != other.container_.size()) + { + throw std::invalid_argument("RoaringBitmap must have same size"); + } + RoaringBitmap rBitmap(block_size_); + for (auto itr = container_.begin(); itr != container_.end(); itr++) + { + auto& key = itr->first; + auto& val = itr->second; + + if (other.container_.count(key)) + { + Bitmap result = val & other.container_.at(key); + rBitmap.container_.emplace(key, result); + } + } + + return rBitmap; + } + + /** + * 与另一个 Bitmap 进行 OR 操作 + */ + RoaringBitmap operator|(const RoaringBitmap& other) const + { + if (block_size() != other.block_size()) + { + throw std::invalid_argument("RoaringBitmap must have same block_size"); + } + if (container_.size() != other.container_.size()) + { + throw std::invalid_argument("RoaringBitmap must have same size"); + } + RoaringBitmap rBitmap(block_size_); + for (auto itr = container_.begin(); itr != container_.end(); itr++) + { + auto& key = itr->first; + auto& val = itr->second; + + if (other.container_.count(key)) + { + Bitmap result = val | other.container_.at(key); + rBitmap.container_.emplace(key, result); + } + else + { + rBitmap.container_.emplace(key, val); + } + } + + for (auto itr = other.container_.begin(); itr != other.container_.end(); itr++) + { + auto& key = itr->first; + auto& val = itr->second; + if (!rBitmap.container_.count(key)) + { + rBitmap.container_.emplace(key, val); + } + } + + return rBitmap; + } + + /** + * 判断是否相等 + */ + bool operator==(const RoaringBitmap& other) const + { + if (block_size() != other.block_size()) + { + return false; + } + if (container_.size() != other.container_.size()) + { + return false; + } + for (auto itr = container_.begin(); itr != container_.end(); itr++) + { + auto& key = itr->first; + auto& val = itr->second; + if (!other.container_.count(key)) + { + return false; + } + if (val != other.container_.at(key)) + { + return false; + } + } + return true; + } +}; + +#include + +void test_bitmap() +{ + std::cout << "=== 基础 Bitmap 示例 ===" << std::endl; + Bitmap bitmap1(100); + bitmap1.set(10); + bitmap1.set(20); + bitmap1.set(99); + + std::cout << "位置 10: " << bitmap1.get(10) << std::endl; + std::cout << "位置 10: " << bitmap1.get(10) << std::endl; + std::cout << "位置 25: " << bitmap1[25] << std::endl; + std::cout << "元素数量: " << bitmap1.count() << std::endl; + bitmap1.set(99); // 添加重复元素,数量保持不变 + std::cout << "元素数量: " << bitmap1.count() << std::endl; + bitmap1.reset(99); // 重置位置20的只为0,数量减1 + std::cout << "元素数量: " << bitmap1.count() << std::endl; + + // 逻辑位运算 + Bitmap bitmap2(100); + bitmap2.set(20); + bitmap2.set(30); + bitmap2.set(40); + std::cout << "bitmap2 size: " << bitmap2.size() << ", count: " << bitmap2.count() << std::endl; + std::cout << "bitmap1 == bitmap2: " << (bitmap1 == bitmap2) << std::endl; + + auto andResult = bitmap1 & bitmap2; + std::cout << "bitmap1 & bitmap2 : " << andResult.count() << std::endl; + auto orResult = bitmap1 | bitmap2; + std::cout << "bitmap1 | bitmap2 : " << orResult.count() << std::endl; +} + +void test_dynamic_bitmap() +{ + std::cout << "\n=== 动态 Bitmap 示例 ===" << std::endl; + DynamicBitmap dynamicBitmap; + dynamicBitmap.set(20); + dynamicBitmap.set(30); + std::cout << "size: " << dynamicBitmap.size() << ", count: " << dynamicBitmap.count() + << std::endl; + dynamicBitmap.set(80); // 自动扩容 + std::cout << "size: " << dynamicBitmap.size() << ", count: " << dynamicBitmap.count() + << std::endl; +} + +void test_roaring_bitmap() +{ + std::cout << "\n=== RoaringBitmap 示例 ===" << std::endl; + RoaringBitmap bitmap1(64); + bitmap1.set(10); + bitmap1.set(20); + std::cout << "bitmap1 size: " << bitmap1.size() << ", count: " << bitmap1.count() << std::endl; + bitmap1.set(80); + std::cout << "bitmap1 size: " << bitmap1.size() << ", count: " << bitmap1.count() << std::endl; + + RoaringBitmap bitmap2(64); + bitmap2.set(20); + bitmap2.set(40); + bitmap2.set(130); + std::cout << "bitmap2 size: " << bitmap2.size() << ", count: " << bitmap2.count() << std::endl; + std::cout << "bitmap1 == bitmap2: " << (bitmap1 == bitmap2) << std::endl; + + RoaringBitmap andBitmap = bitmap1 & bitmap2; + std::cout << "andBitmap size: " << andBitmap.size() << ", count: " << andBitmap.count() + << std::endl; + RoaringBitmap orBitmap = bitmap1 | bitmap2; + std::cout << "orBitmap size: " << orBitmap.size() << ", count: " << orBitmap.count() + << std::endl; +} + +void bitmapExamples() +{ + test_bitmap(); + test_dynamic_bitmap(); + test_roaring_bitmap(); +} + +int main() +{ + bitmapExamples(); + return 0; +} \ No newline at end of file -- Gitee From 783b3a005d2f3919fe8f7323870a3b92c7b5b4d9 Mon Sep 17 00:00:00 2001 From: Spencer Date: Sat, 11 Oct 2025 10:08:18 +0800 Subject: [PATCH 15/40] fix: bitmap.cpp --- src/usage_demo/bitmap.cpp | 223 ++++++++++++++++++++++---------------- 1 file changed, 132 insertions(+), 91 deletions(-) diff --git a/src/usage_demo/bitmap.cpp b/src/usage_demo/bitmap.cpp index e6ab31a..c6890d3 100644 --- a/src/usage_demo/bitmap.cpp +++ b/src/usage_demo/bitmap.cpp @@ -1,18 +1,53 @@ #include #include +#include #include #include #include #include -class Bitmap +// ibitmap 统一接口类 +class ibitmap +{ +public: + virtual ~ibitmap() = default; + +public: + // 基本操作 + virtual void set(size_t position) = 0; + virtual bool get(size_t position) const = 0; + virtual void reset(size_t position) = 0; + virtual size_t count() const = 0; + virtual size_t size() const = 0; + + // 操作符重载 + virtual bool operator[](size_t position) const = 0; + // 比较操作 + virtual bool equals(const ibitmap& other) const = 0; + + // // 工具方法 + // virtual void clear() + // { + // for (size_t i = 0; i < size(); ++i) + // { + // if (get(i)) + // { + // reset(i); + // } + // } + // } + + virtual bool empty() const { return count() == 0; } +}; + +class bitmap : public ibitmap { protected: std::vector bits_; size_t size_; public: - Bitmap(size_t size) + bitmap(size_t size) : size_(size) { // 计算需要的字节数,向上取整: size/8 @@ -20,12 +55,11 @@ public: bits_.resize(std::ceil(size / 8.0), 0); } - ~Bitmap() = default; - +public: /** * 设置指定位置为 1 */ - void set(size_t position) + void set(size_t position) override { if (position >= size_) { @@ -40,7 +74,7 @@ public: /** * 获取指定位置的值 */ - bool get(size_t position) const + bool get(size_t position) const override { if (position >= size_) { @@ -54,12 +88,12 @@ public: /** * 重载 [] 操作符 */ - bool operator[](size_t position) const { return get(position); } + bool operator[](size_t position) const override { return get(position); } /** * 设置指定位置为 0 */ - void reset(size_t position) + void reset(size_t position) override { if (position >= size_) { @@ -75,7 +109,7 @@ public: * * @return size_t */ - size_t count() + size_t count() const override { size_t count = 0; for (size_t i = 0; i < bits_.size(); i++) @@ -99,44 +133,46 @@ public: } /** - * 获取 Bitmap 的大小(bit 数) + * 获取 bitmap 的大小(bit 数) */ - size_t size() const { return size_; } + size_t size() const override { return size_; } - /** - * 判断是否相等 - */ - bool operator==(const Bitmap& other) const + bool equals(const ibitmap& other) const override { - if (size_ != other.size_) + const bitmap* other_bitmap = dynamic_cast(&other); + if (!other_bitmap || size_ != other_bitmap->size_) + { return false; + } + for (size_t i = 0; i < bits_.size(); i++) { - if (bits_[i] != other.bits_[i]) + if (bits_[i] != other_bitmap->bits_[i]) return false; } return true; } + /** + * 判断是否相等 + */ + bool operator==(const bitmap& other) const { return equals(other); } + /** * 判断是否不相等 */ - bool operator!=(const Bitmap& other) const - { - bool equal = (*this == other); - return !equal; - } + bool operator!=(const bitmap& other) const { return !equals(other); } /** - * 与另一个 Bitmap 进行 AND 操作 + * 与另一个 bitmap 进行 AND 操作 */ - Bitmap operator&(const Bitmap& other) const + bitmap operator&(const bitmap& other) const { if (this->size_ != other.size_) { throw std::invalid_argument("Bitmaps must have same size"); } - Bitmap result(size_); + bitmap result(size_); for (size_t i = 0; i < bits_.size(); i++) { result.bits_[i] = bits_[i] & other.bits_[i]; @@ -145,15 +181,15 @@ public: } /** - * 与另一个 Bitmap 进行 OR 操作 + * 与另一个 bitmap 进行 OR 操作 */ - Bitmap operator|(const Bitmap& other) const + bitmap operator|(const bitmap& other) const { if (this->size_ != other.size_) { throw std::invalid_argument("Bitmaps must have same size"); } - Bitmap result(size_); + bitmap result(size_); for (size_t i = 0; i < bits_.size(); i++) { result.bits_[i] = bits_[i] | other.bits_[i]; @@ -162,21 +198,21 @@ public: } }; -class DynamicBitmap : public Bitmap +class dynamic_bitmap : public bitmap { public: - DynamicBitmap(size_t init_size = 64) - : Bitmap(init_size) + dynamic_bitmap(size_t init_size = 64) + : bitmap(init_size) { } - ~DynamicBitmap() = default; + ~dynamic_bitmap() = default; public: /** * 设置指定位置为 1 */ - void set(size_t position) + void set(size_t position) override { ensureCapacity(position + 1); @@ -199,19 +235,19 @@ private: } }; -class RoaringBitmap +class roaring_bitmap : public ibitmap { private: size_t block_size_{ 0 }; - std::unordered_map container_; + std::unordered_map container_; public: - RoaringBitmap(size_t blockSize) + roaring_bitmap(size_t blockSize) : block_size_(blockSize) { } - ~RoaringBitmap() = default; + // ~roaring_bitmap() = default; public: size_t block_size() const { return block_size_; } @@ -219,7 +255,7 @@ public: /** * 设置指定位置为 1 */ - void set(size_t position) + void set(size_t position) override { size_t key = position / block_size_; size_t bitPosition = position % block_size_; @@ -227,7 +263,7 @@ public: if (!container_.count(key)) { // block不存在则添加新的block - container_.emplace(key, Bitmap(block_size_)); + container_.emplace(key, bitmap(block_size_)); } container_.at(key).set(bitPosition); } @@ -235,7 +271,7 @@ public: /** * 获取指定位置的值 */ - bool get(size_t position) const + bool get(size_t position) const override { size_t key = position / block_size_; size_t bitPosition = position % block_size_; @@ -251,12 +287,12 @@ public: /** * 重载 [] 操作符 */ - bool operator[](size_t position) const { return get(position); } + bool operator[](size_t position) const override { return get(position); } /** * 设置指定位置为 0 */ - void reset(size_t position) + void reset(size_t position) override { size_t key = position / block_size_; size_t bitPosition = position % block_size_; @@ -275,7 +311,7 @@ public: * * @return size_t */ - size_t count() + size_t count() const override { size_t count = 0; for (auto itr = container_.begin(); itr != container_.end(); itr++) @@ -286,9 +322,9 @@ public: } /** - * 获取 Bitmap 的大小(bit 数) + * 获取 bitmap 的大小(bit 数) */ - size_t size() const + size_t size() const override { size_t size = 0; for (auto itr = container_.begin(); itr != container_.end(); itr++) @@ -299,9 +335,43 @@ public: } /** - * 与另一个 Bitmap 进行 AND 操作 + * 判断是否相等 */ - RoaringBitmap operator&(const RoaringBitmap& other) const + // bool operator==(const roaring_bitmap& other) const + bool equals(const ibitmap& other) const override + { + auto otherPtr = dynamic_cast(&other); + if (block_size() != otherPtr->block_size()) + { + return false; + } + if (container_.size() != otherPtr->container_.size()) + { + return false; + } + for (auto itr = container_.begin(); itr != container_.end(); itr++) + { + auto& key = itr->first; + auto& val = itr->second; + if (!otherPtr->container_.count(key)) + { + return false; + } + if (val != otherPtr->container_.at(key)) + { + return false; + } + } + return true; + } + + bool operator==(const roaring_bitmap& other) const { return equals(other); } + bool operator!=(const roaring_bitmap& other) const { return !equals(other); } + + /** + * 与另一个 bitmap 进行 AND 操作 + */ + roaring_bitmap operator&(const roaring_bitmap& other) const { if (block_size() != other.block_size()) { @@ -311,7 +381,7 @@ public: { throw std::invalid_argument("RoaringBitmap must have same size"); } - RoaringBitmap rBitmap(block_size_); + roaring_bitmap rBitmap(block_size_); for (auto itr = container_.begin(); itr != container_.end(); itr++) { auto& key = itr->first; @@ -319,7 +389,7 @@ public: if (other.container_.count(key)) { - Bitmap result = val & other.container_.at(key); + bitmap result = val & other.container_.at(key); rBitmap.container_.emplace(key, result); } } @@ -328,9 +398,9 @@ public: } /** - * 与另一个 Bitmap 进行 OR 操作 + * 与另一个 bitmap 进行 OR 操作 */ - RoaringBitmap operator|(const RoaringBitmap& other) const + roaring_bitmap operator|(const roaring_bitmap& other) const { if (block_size() != other.block_size()) { @@ -340,7 +410,7 @@ public: { throw std::invalid_argument("RoaringBitmap must have same size"); } - RoaringBitmap rBitmap(block_size_); + roaring_bitmap rBitmap(block_size_); for (auto itr = container_.begin(); itr != container_.end(); itr++) { auto& key = itr->first; @@ -348,7 +418,7 @@ public: if (other.container_.count(key)) { - Bitmap result = val | other.container_.at(key); + bitmap result = val | other.container_.at(key); rBitmap.container_.emplace(key, result); } else @@ -369,43 +439,14 @@ public: return rBitmap; } - - /** - * 判断是否相等 - */ - bool operator==(const RoaringBitmap& other) const - { - if (block_size() != other.block_size()) - { - return false; - } - if (container_.size() != other.container_.size()) - { - return false; - } - for (auto itr = container_.begin(); itr != container_.end(); itr++) - { - auto& key = itr->first; - auto& val = itr->second; - if (!other.container_.count(key)) - { - return false; - } - if (val != other.container_.at(key)) - { - return false; - } - } - return true; - } }; #include void test_bitmap() { - std::cout << "=== 基础 Bitmap 示例 ===" << std::endl; - Bitmap bitmap1(100); + std::cout << "=== 基础 bitmap 示例 ===" << std::endl; + bitmap bitmap1(100); bitmap1.set(10); bitmap1.set(20); bitmap1.set(99); @@ -420,7 +461,7 @@ void test_bitmap() std::cout << "元素数量: " << bitmap1.count() << std::endl; // 逻辑位运算 - Bitmap bitmap2(100); + bitmap bitmap2(100); bitmap2.set(20); bitmap2.set(30); bitmap2.set(40); @@ -435,8 +476,8 @@ void test_bitmap() void test_dynamic_bitmap() { - std::cout << "\n=== 动态 Bitmap 示例 ===" << std::endl; - DynamicBitmap dynamicBitmap; + std::cout << "\n=== 动态 bitmap 示例 ===" << std::endl; + dynamic_bitmap dynamicBitmap; dynamicBitmap.set(20); dynamicBitmap.set(30); std::cout << "size: " << dynamicBitmap.size() << ", count: " << dynamicBitmap.count() @@ -449,24 +490,24 @@ void test_dynamic_bitmap() void test_roaring_bitmap() { std::cout << "\n=== RoaringBitmap 示例 ===" << std::endl; - RoaringBitmap bitmap1(64); + roaring_bitmap bitmap1(64); bitmap1.set(10); bitmap1.set(20); std::cout << "bitmap1 size: " << bitmap1.size() << ", count: " << bitmap1.count() << std::endl; bitmap1.set(80); std::cout << "bitmap1 size: " << bitmap1.size() << ", count: " << bitmap1.count() << std::endl; - RoaringBitmap bitmap2(64); + roaring_bitmap bitmap2(64); bitmap2.set(20); bitmap2.set(40); bitmap2.set(130); std::cout << "bitmap2 size: " << bitmap2.size() << ", count: " << bitmap2.count() << std::endl; std::cout << "bitmap1 == bitmap2: " << (bitmap1 == bitmap2) << std::endl; - RoaringBitmap andBitmap = bitmap1 & bitmap2; + roaring_bitmap andBitmap = bitmap1 & bitmap2; std::cout << "andBitmap size: " << andBitmap.size() << ", count: " << andBitmap.count() << std::endl; - RoaringBitmap orBitmap = bitmap1 | bitmap2; + roaring_bitmap orBitmap = bitmap1 | bitmap2; std::cout << "orBitmap size: " << orBitmap.size() << ", count: " << orBitmap.count() << std::endl; } -- Gitee From 1cf8f7a5042e1d5828a0d2b38119eaf628754d79 Mon Sep 17 00:00:00 2001 From: Spencer Date: Sat, 11 Oct 2025 11:33:21 +0800 Subject: [PATCH 16/40] fix: bitmap --- src/usage_demo/bitmap.cpp | 88 +++++++++++++++++++++++++++++++++++---- 1 file changed, 80 insertions(+), 8 deletions(-) diff --git a/src/usage_demo/bitmap.cpp b/src/usage_demo/bitmap.cpp index c6890d3..6862ca9 100644 --- a/src/usage_demo/bitmap.cpp +++ b/src/usage_demo/bitmap.cpp @@ -168,12 +168,9 @@ public: */ bitmap operator&(const bitmap& other) const { - if (this->size_ != other.size_) - { - throw std::invalid_argument("Bitmaps must have same size"); - } - bitmap result(size_); - for (size_t i = 0; i < bits_.size(); i++) + auto minSize = std::min(size_, other.size_); + bitmap result(minSize); + for (size_t i = 0; i < minSize; i++) { result.bits_[i] = bits_[i] & other.bits_[i]; } @@ -185,17 +182,86 @@ public: */ bitmap operator|(const bitmap& other) const { - if (this->size_ != other.size_) + auto minSize = std::min(size_, other.size_); + auto maxSize = std::max(size_, other.size_); + + bitmap result(maxSize); + for (size_t i = 0; i < minSize; i++) + { + result.bits_[i] = bits_[i] | other.bits_[i]; + } + for (size_t i = minSize; i < maxSize; i++) + { + result.bits_[i] = size_ > other.size_ ? bits_[i] : other.bits_[i]; + } + return result; + } + + // 按位取反 + bitmap operator~() const + { + bitmap result(size_); + for (size_t i = 0; i < bits_.size(); i++) + { + // uint8_t类型本身可以进行按位取反操作 + result.bits_[i] = ~bits_[i]; + } + return result; + } + + // 按位异或 + bitmap operator^(const bitmap& other) + { + if (size_ != other.size_) { throw std::invalid_argument("Bitmaps must have same size"); } bitmap result(size_); for (size_t i = 0; i < bits_.size(); i++) { - result.bits_[i] = bits_[i] | other.bits_[i]; + result.bits_[i] = bits_[i] ^ other.bits_[i]; } return result; } + + bitmap& operator&=(const bitmap& other) + { + if (size_ != other.size_) + { + throw std::invalid_argument("Bitmaps must have same size"); + } + for (size_t i = 0; i < bits_.size(); i++) + { + bits_[i] = bits_[i] & other.bits_[i]; + } + return *this; + } + + bitmap& operator|=(const bitmap& other) + { + if (size_ != other.size_) + { + throw std::invalid_argument("Bitmaps must have same size"); + } + for (size_t i = 0; i < bits_.size(); i++) + { + bits_[i] = bits_[i] | other.bits_[i]; + } + return *this; + } + + bitmap& operator^=(const bitmap& other) + { + if (size_ != other.size_) + { + throw std::invalid_argument("Bitmaps must have same size"); + } + for (size_t i = 0; i < bits_.size(); i++) + { + bits_[i] = bits_[i] ^ other.bits_[i]; + } + return *this; + } }; class dynamic_bitmap : public bitmap @@ -368,6 +434,12 @@ public: bool operator==(const roaring_bitmap& other) const { return equals(other); } bool operator!=(const roaring_bitmap& other) const { return !equals(other); } + // // 按位取反 + // roaring_bitmap operator~(const roaring_bitmap& other) const + // { + // // TODO + // } + /** * 与另一个 bitmap 进行 AND 操作 */ -- Gitee From 811c189d45e5cf47e8d2a480838427f1df982a19 Mon Sep 17 00:00:00 2001 From: Spencer Date: Sat, 11 Oct 2025 16:11:23 +0800 Subject: [PATCH 17/40] fix: bitmap.cpp --- src/usage_demo/bitmap.cpp | 249 ++++++++++++++++++++++++++++++++++---- 1 file changed, 228 insertions(+), 21 deletions(-) diff --git a/src/usage_demo/bitmap.cpp b/src/usage_demo/bitmap.cpp index 6862ca9..3c3ac6d 100644 --- a/src/usage_demo/bitmap.cpp +++ b/src/usage_demo/bitmap.cpp @@ -1,6 +1,7 @@ -#include +#include +#include #include -#include +// #include #include #include #include @@ -17,27 +18,17 @@ public: virtual void set(size_t position) = 0; virtual bool get(size_t position) const = 0; virtual void reset(size_t position) = 0; + virtual void reset() = 0; virtual size_t count() const = 0; virtual size_t size() const = 0; + virtual std::string to_string() const = 0; + virtual void from_string() = 0; + virtual std::vector valuelist() const = 0; // 操作符重载 virtual bool operator[](size_t position) const = 0; // 比较操作 virtual bool equals(const ibitmap& other) const = 0; - - // // 工具方法 - // virtual void clear() - // { - // for (size_t i = 0; i < size(); ++i) - // { - // if (get(i)) - // { - // reset(i); - // } - // } - // } - - virtual bool empty() const { return count() == 0; } }; class bitmap : public ibitmap @@ -104,6 +95,9 @@ public: bits_[byteIndex] &= ~(1 << bitIndex); // 对应的bit设置为0 } + // 将所有元素重置为0 + void reset() override { std::fill(bits_.begin(), bits_.end(), 0); } + /** * @brief 获取数值为1的位数 * @@ -137,6 +131,40 @@ public: */ size_t size() const override { return size_; } + std::string to_string() const + { + // todo + return std::string(); + } + + void from_string() + { + // todo + } + + std::vector valuelist() const + { + std::vector result; + for (size_t i = 0; i < bits_.size(); i++) + { + uint8_t byte = bits_[i]; + if (byte == 0) + { + continue; + } + + size_t base = i << 3; // i * 8 + for (size_t j = 0; j < 8; j++) + { + if ((byte & (1 << j)) != 0) + { + result.emplace_back(base + j); + } + } + } + return result; + } + bool equals(const ibitmap& other) const override { const bitmap* other_bitmap = dynamic_cast(&other); @@ -210,7 +238,7 @@ public: } // 按位异或 - bitmap operator^(const bitmap& other) + bitmap operator^(const bitmap& other) const { if (size_ != other.size_) { @@ -372,6 +400,15 @@ public: itr->second.reset(position); } + // 将所有元素重置为0 + void reset() override + { + for (auto itr = container_.begin(); itr != container_.end(); itr++) + { + itr->second.reset(); + } + } + /** * @brief 获取数值为1的位数 * @@ -400,6 +437,35 @@ public: return size; } + std::string to_string() const + { + // todo + return std::string(); + } + + void from_string() + { + // todo + } + + std::vector valuelist() const + { + std::vector keys; + for (const auto& pair : container_) + { + keys.emplace_back(pair.first); + } + std::sort(keys.begin(), keys.end()); + + std::vector result; + for (const auto& key : keys) + { + auto vec = container_.at(key).valuelist(); + result.insert(result.end(), vec.begin(), vec.end()); + } + return result; + } + /** * 判断是否相等 */ @@ -478,10 +544,7 @@ public: { throw std::invalid_argument("RoaringBitmap must have same block_size"); } - if (container_.size() != other.container_.size()) - { - throw std::invalid_argument("RoaringBitmap must have same size"); - } + roaring_bitmap rBitmap(block_size_); for (auto itr = container_.begin(); itr != container_.end(); itr++) { @@ -511,6 +574,150 @@ public: return rBitmap; } + + /** + * 与另一个 bitmap 进行 Not 操作(按位取反) + */ + roaring_bitmap operator~() const + { + roaring_bitmap rBitmap(block_size_); + for (auto itr = container_.begin(); itr != container_.end(); itr++) + { + auto& key = itr->first; + auto& val = itr->second; + + rBitmap.container_.emplace(key, ~val); + } + return rBitmap; + } + + /** + * 与另一个 bitmap 进行 异或 操作 + */ + roaring_bitmap operator^(const roaring_bitmap& other) const + { + if (block_size() != other.block_size()) + { + throw std::invalid_argument("RoaringBitmap must have same block_size"); + } + if (container_.size() != other.container_.size()) + { + throw std::invalid_argument("RoaringBitmap must have same size"); + } + roaring_bitmap rBitmap(block_size_); + for (auto itr = container_.begin(); itr != container_.end(); itr++) + { + auto& key = itr->first; + auto& val = itr->second; + + if (other.container_.count(key)) + { + bitmap result = val ^ other.container_.at(key); + rBitmap.container_.emplace(key, result); + } + else + { + throw std::invalid_argument("Key " + std::to_string(key) + + " not in other container"); + } + } + + return rBitmap; + } + + // 与 + roaring_bitmap& operator&=(const roaring_bitmap& other) + { + // todo + if (block_size() != other.block_size()) + { + throw std::invalid_argument("RoaringBitmap must have same block_size"); + } + if (container_.size() != other.container_.size()) + { + throw std::invalid_argument("RoaringBitmap must have same size"); + } + + for (auto itr = container_.begin(); itr != container_.end(); itr++) + { + auto& key = itr->first; + auto& val = itr->second; + + if (other.container_.count(key)) + { + val &= other.container_.at(key); + } + else + { + std::string errMsg = "Key " + std::to_string(key) + " not in other container."; + throw std::invalid_argument(errMsg); + } + } + + return *this; + } + + // 或 + roaring_bitmap& operator|=(const roaring_bitmap& other) + { + if (block_size() != other.block_size()) + { + throw std::invalid_argument("RoaringBitmap must have same block_size"); + } + if (container_.size() != other.container_.size()) + { + throw std::invalid_argument("RoaringBitmap must have same size"); + } + + for (auto itr = container_.begin(); itr != container_.end(); itr++) + { + auto& key = itr->first; + auto& val = itr->second; + + if (other.container_.count(key)) + { + val |= other.container_.at(key); + } + else + { + std::string errMsg = "Key " + std::to_string(key) + " not in other container."; + throw std::invalid_argument(errMsg); + } + } + + return *this; + } + + // 异或 + roaring_bitmap& operator^=(const roaring_bitmap& other) + { + if (block_size() != other.block_size()) + { + throw std::invalid_argument("RoaringBitmap must have same block_size"); + } + if (container_.size() != other.container_.size()) + { + throw std::invalid_argument("RoaringBitmap must have same size"); + } + + for (auto itr = container_.begin(); itr != container_.end(); itr++) + { + auto& key = itr->first; + auto& val = itr->second; + + if (other.container_.count(key)) + { + val ^= other.container_.at(key); + } + else + { + std::string errMsg = "Key " + std::to_string(key) + " not in other container."; + throw std::invalid_argument(errMsg); + } + } + + return *this; + } }; #include -- Gitee From 13d7104ae0e802fb6373876af795ab0882fcbce3 Mon Sep 17 00:00:00 2001 From: Spencer Date: Sat, 11 Oct 2025 17:00:43 +0800 Subject: [PATCH 18/40] fix: bitmap --- include/common_util/bitmap.h | 237 +++++++++++ src/common_util/bitmap.cpp | 640 ++++++++++++++++++++++++++++ src/usage_demo/bitmap.cpp | 805 ----------------------------------- src/usage_demo/bitmap.hpp | 83 ++++ 4 files changed, 960 insertions(+), 805 deletions(-) create mode 100644 include/common_util/bitmap.h create mode 100644 src/common_util/bitmap.cpp delete mode 100644 src/usage_demo/bitmap.cpp create mode 100644 src/usage_demo/bitmap.hpp diff --git a/include/common_util/bitmap.h b/include/common_util/bitmap.h new file mode 100644 index 0000000..4bf4b64 --- /dev/null +++ b/include/common_util/bitmap.h @@ -0,0 +1,237 @@ +#pragma once + +// #include +// #include +#include +// #include +#include +#include +#include + +namespace cutl +{ + +// ibitmap 统一接口类 +class ibitmap +{ +public: + virtual ~ibitmap() = default; + +public: + // 基本操作 + virtual void set(size_t position) = 0; + virtual bool get(size_t position) const = 0; + virtual void reset(size_t position) = 0; + virtual void reset() = 0; + virtual size_t count() const = 0; + virtual size_t size() const = 0; + virtual std::string to_string() const = 0; + virtual void from_string() = 0; + virtual std::vector valuelist() const = 0; + + // 操作符重载 + virtual bool operator[](size_t position) const = 0; + // 比较操作 + virtual bool equals(const ibitmap& other) const = 0; +}; + +class bitmap : public ibitmap +{ +protected: + std::vector bits_; + size_t size_; + +public: + bitmap(size_t size); + ~bitmap() = default; + +public: + /** + * 设置指定位置为 1 + */ + void set(size_t position) override; + + /** + * 获取指定位置的值 + */ + bool get(size_t position) const override; + + /** + * 重载 [] 操作符 + */ + bool operator[](size_t position) const override { return get(position); } + + /** + * 设置指定位置为 0 + */ + void reset(size_t position) override; + + // 将所有元素重置为0 + void reset() override; + + /** + * @brief 获取数值为1的位数 + * + * @return size_t + */ + size_t count() const override; + + /** + * 获取 bitmap 的大小(bit 数) + */ + size_t size() const override { return size_; } + + std::string to_string() const; + + void from_string(); + + std::vector valuelist() const; + + bool equals(const ibitmap& other) const override; + + /** + * 判断是否相等 + */ + bool operator==(const bitmap& other) const { return equals(other); } + + /** + * 判断是否不相等 + */ + bool operator!=(const bitmap& other) const { return !equals(other); } + + /** + * 与另一个 bitmap 进行 AND 操作 + */ + bitmap operator&(const bitmap& other) const; + + /** + * 与另一个 bitmap 进行 OR 操作 + */ + bitmap operator|(const bitmap& other) const; + + // 按位取反 + bitmap operator~() const; + + // 按位异或 + bitmap operator^(const bitmap& other) const; + + bitmap& operator&=(const bitmap& other); + + bitmap& operator|=(const bitmap& other); + + bitmap& operator^=(const bitmap& other); +}; + +class dynamic_bitmap : public bitmap +{ +public: + dynamic_bitmap(size_t init_size = 64); + ~dynamic_bitmap() = default; + +public: + /** + * 设置指定位置为 1 + */ + void set(size_t position) override; + +private: + /** + * 动态扩容 + */ + void ensureCapacity(size_t minSize); +}; + +class roaring_bitmap : public ibitmap +{ +private: + size_t block_size_{ 0 }; + std::unordered_map container_; + +public: + roaring_bitmap(size_t blockSize); + ~roaring_bitmap() = default; + +public: + size_t block_size() const { return block_size_; } + + /** + * 设置指定位置为 1 + */ + void set(size_t position) override; + + /** + * 获取指定位置的值 + */ + bool get(size_t position) const override; + + /** + * 重载 [] 操作符 + */ + bool operator[](size_t position) const override { return get(position); } + + /** + * 设置指定位置为 0 + */ + void reset(size_t position) override; + + // 将所有元素重置为0 + void reset() override; + + /** + * @brief 获取数值为1的位数 + * + * @return size_t + */ + size_t count() const override; + + /** + * 获取 bitmap 的大小(bit 数) + */ + size_t size() const override; + + std::string to_string() const; + + void from_string(); + + std::vector valuelist() const; + + /** + * 判断是否相等 + */ + // bool operator==(const roaring_bitmap& other) const + bool equals(const ibitmap& other) const override; + + bool operator==(const roaring_bitmap& other) const { return equals(other); } + bool operator!=(const roaring_bitmap& other) const { return !equals(other); } + + /** + * 与另一个 bitmap 进行 AND 操作 + */ + roaring_bitmap operator&(const roaring_bitmap& other) const; + + /** + * 与另一个 bitmap 进行 OR 操作 + */ + roaring_bitmap operator|(const roaring_bitmap& other) const; + + /** + * 与另一个 bitmap 进行 Not 操作(按位取反) + */ + roaring_bitmap operator~() const; + + /** + * 与另一个 bitmap 进行 异或 操作 + */ + roaring_bitmap operator^(const roaring_bitmap& other) const; + + // 与 + roaring_bitmap& operator&=(const roaring_bitmap& other); + + // 或 + roaring_bitmap& operator|=(const roaring_bitmap& other); + + // 异或 + roaring_bitmap& operator^=(const roaring_bitmap& other); +}; + +} // namespace cutl \ No newline at end of file diff --git a/src/common_util/bitmap.cpp b/src/common_util/bitmap.cpp new file mode 100644 index 0000000..006c4a7 --- /dev/null +++ b/src/common_util/bitmap.cpp @@ -0,0 +1,640 @@ +#include +#include +// #include +// #include +#include +// #include +// #include +// #include +#include "bitmap.h" + +namespace cutl +{ + +bitmap::bitmap(size_t size) + : size_(size) +{ + // 计算需要的字节数,向上取整: size/8 + // 注意:这里必须除以8.0,才能获取浮点数然后向上取整 + bits_.resize(std::ceil(size / 8.0), 0); +} + +/** + * 设置指定位置为 1 + */ +void bitmap::set(size_t position) +{ + if (position >= size_) + { + throw std::out_of_range("Position " + std::to_string(position) + " out of range"); + } + + size_t byteIndex = position >> 3; // 等价于 position / 8 + size_t bitIndex = position & 0x7; // 等价于 position % 8 + bits_[byteIndex] |= (1 << bitIndex); +} + +/** + * 获取指定位置的值 + */ +bool bitmap::get(size_t position) const +{ + if (position >= size_) + { + throw std::out_of_range("Position " + std::to_string(position) + " out of range"); + } + size_t byteIndex = position >> 3; // 等价于 position / 8 + size_t bitIndex = position & 0x7; // 等价于 position % 8 + return (bits_[byteIndex] & (1 << bitIndex)) != 0; +} + +/** + * 设置指定位置为 0 + */ +void bitmap::reset(size_t position) +{ + if (position >= size_) + { + throw std::out_of_range("Position " + std::to_string(position) + " out of range"); + } + size_t byteIndex = position >> 3; // 等价于 position / 8 + size_t bitIndex = position & 0x7; // 等价于 position % 8 + bits_[byteIndex] &= ~(1 << bitIndex); // 对应的bit设置为0 +} + +// 将所有元素重置为0 +void bitmap::reset() +{ + std::fill(bits_.begin(), bits_.end(), 0); +} + +/** + * @brief 获取数值为1的位数 + * + * @return size_t + */ +size_t bitmap::count() const +{ + size_t count = 0; + for (size_t i = 0; i < bits_.size(); i++) + { + uint8_t byte = bits_[i] & 0xff; + for (size_t j = 0; j < 8; j++) + { + // 方法一 + if ((byte & (1 << j)) != 0) + { + count++; + } + // 方法二 + // if (((byte >>> j) & 0x01) == 1) + // { + // count ++; + // } + } + } + return count; +} + +std::string bitmap::to_string() const +{ + // todo + return std::string(); +} + +void bitmap::from_string() +{ + // todo +} + +std::vector bitmap::valuelist() const +{ + std::vector result; + for (size_t i = 0; i < bits_.size(); i++) + { + uint8_t byte = bits_[i]; + if (byte == 0) + { + continue; + } + + size_t base = i << 3; // i * 8 + for (size_t j = 0; j < 8; j++) + { + if ((byte & (1 << j)) != 0) + { + result.emplace_back(base + j); + } + } + } + return result; +} + +bool bitmap::equals(const ibitmap& other) const +{ + const bitmap* other_bitmap = dynamic_cast(&other); + if (!other_bitmap || size_ != other_bitmap->size_) + { + return false; + } + + for (size_t i = 0; i < bits_.size(); i++) + { + if (bits_[i] != other_bitmap->bits_[i]) + return false; + } + return true; +} + +/** + * 与另一个 bitmap 进行 AND 操作 + */ +bitmap bitmap::operator&(const bitmap& other) const +{ + auto minSize = std::min(size_, other.size_); + bitmap result(minSize); + for (size_t i = 0; i < minSize; i++) + { + result.bits_[i] = bits_[i] & other.bits_[i]; + } + return result; +} + +/** + * 与另一个 bitmap 进行 OR 操作 + */ +bitmap bitmap::operator|(const bitmap& other) const +{ + auto minSize = std::min(size_, other.size_); + auto maxSize = std::max(size_, other.size_); + + bitmap result(maxSize); + for (size_t i = 0; i < minSize; i++) + { + result.bits_[i] = bits_[i] | other.bits_[i]; + } + for (size_t i = minSize; i < maxSize; i++) + { + result.bits_[i] = size_ > other.size_ ? bits_[i] : other.bits_[i]; + } + return result; +} + +// 按位取反 +bitmap bitmap::operator~() const +{ + bitmap result(size_); + for (size_t i = 0; i < bits_.size(); i++) + { + // uint8_t类型本身可以进行按位取反操作 + result.bits_[i] = ~bits_[i]; + } + return result; +} + +// 按位异或 +bitmap bitmap::operator^(const bitmap& other) const +{ + if (size_ != other.size_) + { + throw std::invalid_argument("Bitmaps must have same size"); + } + bitmap result(size_); + for (size_t i = 0; i < bits_.size(); i++) + { + result.bits_[i] = bits_[i] ^ other.bits_[i]; + } + return result; +} + +bitmap& bitmap::operator&=(const bitmap& other) +{ + if (size_ != other.size_) + { + throw std::invalid_argument("Bitmaps must have same size"); + } + for (size_t i = 0; i < bits_.size(); i++) + { + bits_[i] = bits_[i] & other.bits_[i]; + } + return *this; +} + +bitmap& bitmap::operator|=(const bitmap& other) +{ + if (size_ != other.size_) + { + throw std::invalid_argument("Bitmaps must have same size"); + } + for (size_t i = 0; i < bits_.size(); i++) + { + bits_[i] = bits_[i] | other.bits_[i]; + } + return *this; +} + +bitmap& bitmap::operator^=(const bitmap& other) +{ + if (size_ != other.size_) + { + throw std::invalid_argument("Bitmaps must have same size"); + } + for (size_t i = 0; i < bits_.size(); i++) + { + bits_[i] = bits_[i] ^ other.bits_[i]; + } + return *this; +} + +dynamic_bitmap::dynamic_bitmap(size_t init_size) + : bitmap(init_size) +{ +} + +/** + * 设置指定位置为 1 + */ +void dynamic_bitmap::set(size_t position) +{ + ensureCapacity(position + 1); + + size_t byteIndex = position >> 3; // 等价于 position / 8 + size_t bitIndex = position & 0x7; // 等价于 position % 8 + bits_[byteIndex] |= (1 << bitIndex); +} + +/** + * 动态扩容 + */ +void dynamic_bitmap::ensureCapacity(size_t minSize) +{ + if (minSize <= size_) + return; + + size_ = std::max(size_ * 2, minSize); + bits_.resize(size_); +} + +roaring_bitmap::roaring_bitmap(size_t blockSize) + : block_size_(blockSize) +{ +} + +/** + * 设置指定位置为 1 + */ +void roaring_bitmap::set(size_t position) +{ + size_t key = position / block_size_; + size_t bitPosition = position % block_size_; + + if (!container_.count(key)) + { + // block不存在则添加新的block + container_.emplace(key, bitmap(block_size_)); + } + container_.at(key).set(bitPosition); +} + +/** + * 获取指定位置的值 + */ +bool roaring_bitmap::get(size_t position) const +{ + size_t key = position / block_size_; + size_t bitPosition = position % block_size_; + + auto itr = container_.find(key); + if (itr != container_.end()) + { + return false; + } + return itr->second.get(bitPosition); +} + +/** + * 设置指定位置为 0 + */ +void roaring_bitmap::reset(size_t position) +{ + size_t key = position / block_size_; + size_t bitPosition = position % block_size_; + + auto itr = container_.find(key); + if (itr != container_.end()) + { + throw std::out_of_range("Position " + std::to_string(position) + " not in container"); + } + + itr->second.reset(position); +} + +// 将所有元素重置为0 +void roaring_bitmap::reset() +{ + for (auto itr = container_.begin(); itr != container_.end(); itr++) + { + itr->second.reset(); + } +} + +/** + * @brief 获取数值为1的位数 + * + * @return size_t + */ +size_t roaring_bitmap::count() const +{ + size_t count = 0; + for (auto itr = container_.begin(); itr != container_.end(); itr++) + { + count += itr->second.count(); + } + return count; +} + +/** + * 获取 bitmap 的大小(bit 数) + */ +size_t roaring_bitmap::size() const +{ + size_t size = 0; + for (auto itr = container_.begin(); itr != container_.end(); itr++) + { + size += itr->second.size(); + } + return size; +} + +std::string roaring_bitmap::to_string() const +{ + // todo + return std::string(); +} + +void roaring_bitmap::from_string() +{ + // todo +} + +std::vector roaring_bitmap::valuelist() const +{ + std::vector keys; + for (const auto& pair : container_) + { + keys.emplace_back(pair.first); + } + std::sort(keys.begin(), keys.end()); + + std::vector result; + for (const auto& key : keys) + { + auto vec = container_.at(key).valuelist(); + result.insert(result.end(), vec.begin(), vec.end()); + } + return result; +} + +/** + * 判断是否相等 + */ +bool roaring_bitmap::equals(const ibitmap& other) const +{ + auto otherPtr = dynamic_cast(&other); + if (block_size() != otherPtr->block_size()) + { + return false; + } + if (container_.size() != otherPtr->container_.size()) + { + return false; + } + for (auto itr = container_.begin(); itr != container_.end(); itr++) + { + auto& key = itr->first; + auto& val = itr->second; + if (!otherPtr->container_.count(key)) + { + return false; + } + if (val != otherPtr->container_.at(key)) + { + return false; + } + } + return true; +} + +/** + * 与另一个 bitmap 进行 AND 操作 + */ +roaring_bitmap roaring_bitmap::operator&(const roaring_bitmap& other) const +{ + if (block_size() != other.block_size()) + { + throw std::invalid_argument("RoaringBitmap must have same block_size"); + } + if (container_.size() != other.container_.size()) + { + throw std::invalid_argument("RoaringBitmap must have same size"); + } + roaring_bitmap rBitmap(block_size_); + for (auto itr = container_.begin(); itr != container_.end(); itr++) + { + auto& key = itr->first; + auto& val = itr->second; + + if (other.container_.count(key)) + { + bitmap result = val & other.container_.at(key); + rBitmap.container_.emplace(key, result); + } + } + + return rBitmap; +} + +/** + * 与另一个 bitmap 进行 OR 操作 + */ +roaring_bitmap roaring_bitmap::operator|(const roaring_bitmap& other) const +{ + if (block_size() != other.block_size()) + { + throw std::invalid_argument("RoaringBitmap must have same block_size"); + } + + roaring_bitmap rBitmap(block_size_); + for (auto itr = container_.begin(); itr != container_.end(); itr++) + { + auto& key = itr->first; + auto& val = itr->second; + + if (other.container_.count(key)) + { + bitmap result = val | other.container_.at(key); + rBitmap.container_.emplace(key, result); + } + else + { + rBitmap.container_.emplace(key, val); + } + } + + for (auto itr = other.container_.begin(); itr != other.container_.end(); itr++) + { + auto& key = itr->first; + auto& val = itr->second; + if (!rBitmap.container_.count(key)) + { + rBitmap.container_.emplace(key, val); + } + } + + return rBitmap; +} + +/** + * 与另一个 bitmap 进行 Not 操作(按位取反) + */ +roaring_bitmap roaring_bitmap::operator~() const +{ + roaring_bitmap rBitmap(block_size_); + for (auto itr = container_.begin(); itr != container_.end(); itr++) + { + auto& key = itr->first; + auto& val = itr->second; + + rBitmap.container_.emplace(key, ~val); + } + return rBitmap; +} + +/** + * 与另一个 bitmap 进行 异或 操作 + */ +roaring_bitmap roaring_bitmap::operator^(const roaring_bitmap& other) const +{ + if (block_size() != other.block_size()) + { + throw std::invalid_argument("RoaringBitmap must have same block_size"); + } + if (container_.size() != other.container_.size()) + { + throw std::invalid_argument("RoaringBitmap must have same size"); + } + roaring_bitmap rBitmap(block_size_); + for (auto itr = container_.begin(); itr != container_.end(); itr++) + { + auto& key = itr->first; + auto& val = itr->second; + + if (other.container_.count(key)) + { + bitmap result = val ^ other.container_.at(key); + rBitmap.container_.emplace(key, result); + } + else + { + throw std::invalid_argument("Key " + std::to_string(key) + " not in other container"); + } + } + + return rBitmap; +} + +// 与 +roaring_bitmap& roaring_bitmap::operator&=(const roaring_bitmap& other) +{ + // todo + if (block_size() != other.block_size()) + { + throw std::invalid_argument("RoaringBitmap must have same block_size"); + } + if (container_.size() != other.container_.size()) + { + throw std::invalid_argument("RoaringBitmap must have same size"); + } + + for (auto itr = container_.begin(); itr != container_.end(); itr++) + { + auto& key = itr->first; + auto& val = itr->second; + + if (other.container_.count(key)) + { + val &= other.container_.at(key); + } + else + { + std::string errMsg = "Key " + std::to_string(key) + " not in other container."; + throw std::invalid_argument(errMsg); + } + } + + return *this; +} + +// 或 +roaring_bitmap& roaring_bitmap::operator|=(const roaring_bitmap& other) +{ + if (block_size() != other.block_size()) + { + throw std::invalid_argument("RoaringBitmap must have same block_size"); + } + if (container_.size() != other.container_.size()) + { + throw std::invalid_argument("RoaringBitmap must have same size"); + } + + for (auto itr = container_.begin(); itr != container_.end(); itr++) + { + auto& key = itr->first; + auto& val = itr->second; + + if (other.container_.count(key)) + { + val |= other.container_.at(key); + } + else + { + std::string errMsg = "Key " + std::to_string(key) + " not in other container."; + throw std::invalid_argument(errMsg); + } + } + + return *this; +} + +// 异或 +roaring_bitmap& roaring_bitmap::operator^=(const roaring_bitmap& other) +{ + if (block_size() != other.block_size()) + { + throw std::invalid_argument("RoaringBitmap must have same block_size"); + } + if (container_.size() != other.container_.size()) + { + throw std::invalid_argument("RoaringBitmap must have same size"); + } + + for (auto itr = container_.begin(); itr != container_.end(); itr++) + { + auto& key = itr->first; + auto& val = itr->second; + + if (other.container_.count(key)) + { + val ^= other.container_.at(key); + } + else + { + std::string errMsg = "Key " + std::to_string(key) + " not in other container."; + throw std::invalid_argument(errMsg); + } + } + + return *this; +} + +} // namespace cutl \ No newline at end of file diff --git a/src/usage_demo/bitmap.cpp b/src/usage_demo/bitmap.cpp deleted file mode 100644 index 3c3ac6d..0000000 --- a/src/usage_demo/bitmap.cpp +++ /dev/null @@ -1,805 +0,0 @@ -#include -#include -#include -// #include -#include -#include -#include -#include - -// ibitmap 统一接口类 -class ibitmap -{ -public: - virtual ~ibitmap() = default; - -public: - // 基本操作 - virtual void set(size_t position) = 0; - virtual bool get(size_t position) const = 0; - virtual void reset(size_t position) = 0; - virtual void reset() = 0; - virtual size_t count() const = 0; - virtual size_t size() const = 0; - virtual std::string to_string() const = 0; - virtual void from_string() = 0; - virtual std::vector valuelist() const = 0; - - // 操作符重载 - virtual bool operator[](size_t position) const = 0; - // 比较操作 - virtual bool equals(const ibitmap& other) const = 0; -}; - -class bitmap : public ibitmap -{ -protected: - std::vector bits_; - size_t size_; - -public: - bitmap(size_t size) - : size_(size) - { - // 计算需要的字节数,向上取整: size/8 - // 注意:这里必须除以8.0,才能获取浮点数然后向上取整 - bits_.resize(std::ceil(size / 8.0), 0); - } - -public: - /** - * 设置指定位置为 1 - */ - void set(size_t position) override - { - if (position >= size_) - { - throw std::out_of_range("Position " + std::to_string(position) + " out of range"); - } - - size_t byteIndex = position >> 3; // 等价于 position / 8 - size_t bitIndex = position & 0x7; // 等价于 position % 8 - bits_[byteIndex] |= (1 << bitIndex); - } - - /** - * 获取指定位置的值 - */ - bool get(size_t position) const override - { - if (position >= size_) - { - throw std::out_of_range("Position " + std::to_string(position) + " out of range"); - } - size_t byteIndex = position >> 3; // 等价于 position / 8 - size_t bitIndex = position & 0x7; // 等价于 position % 8 - return (bits_[byteIndex] & (1 << bitIndex)) != 0; - } - - /** - * 重载 [] 操作符 - */ - bool operator[](size_t position) const override { return get(position); } - - /** - * 设置指定位置为 0 - */ - void reset(size_t position) override - { - if (position >= size_) - { - throw std::out_of_range("Position " + std::to_string(position) + " out of range"); - } - size_t byteIndex = position >> 3; // 等价于 position / 8 - size_t bitIndex = position & 0x7; // 等价于 position % 8 - bits_[byteIndex] &= ~(1 << bitIndex); // 对应的bit设置为0 - } - - // 将所有元素重置为0 - void reset() override { std::fill(bits_.begin(), bits_.end(), 0); } - - /** - * @brief 获取数值为1的位数 - * - * @return size_t - */ - size_t count() const override - { - size_t count = 0; - for (size_t i = 0; i < bits_.size(); i++) - { - uint8_t byte = bits_[i] & 0xff; - for (size_t j = 0; j < 8; j++) - { - // 方法一 - if ((byte & (1 << j)) != 0) - { - count++; - } - // 方法二 - // if (((byte >>> j) & 0x01) == 1) - // { - // count ++; - // } - } - } - return count; - } - - /** - * 获取 bitmap 的大小(bit 数) - */ - size_t size() const override { return size_; } - - std::string to_string() const - { - // todo - return std::string(); - } - - void from_string() - { - // todo - } - - std::vector valuelist() const - { - std::vector result; - for (size_t i = 0; i < bits_.size(); i++) - { - uint8_t byte = bits_[i]; - if (byte == 0) - { - continue; - } - - size_t base = i << 3; // i * 8 - for (size_t j = 0; j < 8; j++) - { - if ((byte & (1 << j)) != 0) - { - result.emplace_back(base + j); - } - } - } - return result; - } - - bool equals(const ibitmap& other) const override - { - const bitmap* other_bitmap = dynamic_cast(&other); - if (!other_bitmap || size_ != other_bitmap->size_) - { - return false; - } - - for (size_t i = 0; i < bits_.size(); i++) - { - if (bits_[i] != other_bitmap->bits_[i]) - return false; - } - return true; - } - - /** - * 判断是否相等 - */ - bool operator==(const bitmap& other) const { return equals(other); } - - /** - * 判断是否不相等 - */ - bool operator!=(const bitmap& other) const { return !equals(other); } - - /** - * 与另一个 bitmap 进行 AND 操作 - */ - bitmap operator&(const bitmap& other) const - { - auto minSize = std::min(size_, other.size_); - bitmap result(minSize); - for (size_t i = 0; i < minSize; i++) - { - result.bits_[i] = bits_[i] & other.bits_[i]; - } - return result; - } - - /** - * 与另一个 bitmap 进行 OR 操作 - */ - bitmap operator|(const bitmap& other) const - { - auto minSize = std::min(size_, other.size_); - auto maxSize = std::max(size_, other.size_); - - bitmap result(maxSize); - for (size_t i = 0; i < minSize; i++) - { - result.bits_[i] = bits_[i] | other.bits_[i]; - } - for (size_t i = minSize; i < maxSize; i++) - { - result.bits_[i] = size_ > other.size_ ? bits_[i] : other.bits_[i]; - } - return result; - } - - // 按位取反 - bitmap operator~() const - { - bitmap result(size_); - for (size_t i = 0; i < bits_.size(); i++) - { - // uint8_t类型本身可以进行按位取反操作 - result.bits_[i] = ~bits_[i]; - } - return result; - } - - // 按位异或 - bitmap operator^(const bitmap& other) const - { - if (size_ != other.size_) - { - throw std::invalid_argument("Bitmaps must have same size"); - } - bitmap result(size_); - for (size_t i = 0; i < bits_.size(); i++) - { - result.bits_[i] = bits_[i] ^ other.bits_[i]; - } - return result; - } - - bitmap& operator&=(const bitmap& other) - { - if (size_ != other.size_) - { - throw std::invalid_argument("Bitmaps must have same size"); - } - for (size_t i = 0; i < bits_.size(); i++) - { - bits_[i] = bits_[i] & other.bits_[i]; - } - return *this; - } - - bitmap& operator|=(const bitmap& other) - { - if (size_ != other.size_) - { - throw std::invalid_argument("Bitmaps must have same size"); - } - for (size_t i = 0; i < bits_.size(); i++) - { - bits_[i] = bits_[i] | other.bits_[i]; - } - return *this; - } - - bitmap& operator^=(const bitmap& other) - { - if (size_ != other.size_) - { - throw std::invalid_argument("Bitmaps must have same size"); - } - for (size_t i = 0; i < bits_.size(); i++) - { - bits_[i] = bits_[i] ^ other.bits_[i]; - } - return *this; - } -}; - -class dynamic_bitmap : public bitmap -{ -public: - dynamic_bitmap(size_t init_size = 64) - : bitmap(init_size) - { - } - - ~dynamic_bitmap() = default; - -public: - /** - * 设置指定位置为 1 - */ - void set(size_t position) override - { - ensureCapacity(position + 1); - - size_t byteIndex = position >> 3; // 等价于 position / 8 - size_t bitIndex = position & 0x7; // 等价于 position % 8 - bits_[byteIndex] |= (1 << bitIndex); - } - -private: - /** - * 动态扩容 - */ - void ensureCapacity(size_t minSize) - { - if (minSize <= size_) - return; - - size_ = std::max(size_ * 2, minSize); - bits_.resize(size_); - } -}; - -class roaring_bitmap : public ibitmap -{ -private: - size_t block_size_{ 0 }; - std::unordered_map container_; - -public: - roaring_bitmap(size_t blockSize) - : block_size_(blockSize) - { - } - - // ~roaring_bitmap() = default; - -public: - size_t block_size() const { return block_size_; } - - /** - * 设置指定位置为 1 - */ - void set(size_t position) override - { - size_t key = position / block_size_; - size_t bitPosition = position % block_size_; - - if (!container_.count(key)) - { - // block不存在则添加新的block - container_.emplace(key, bitmap(block_size_)); - } - container_.at(key).set(bitPosition); - } - - /** - * 获取指定位置的值 - */ - bool get(size_t position) const override - { - size_t key = position / block_size_; - size_t bitPosition = position % block_size_; - - auto itr = container_.find(key); - if (itr != container_.end()) - { - return false; - } - return itr->second.get(bitPosition); - } - - /** - * 重载 [] 操作符 - */ - bool operator[](size_t position) const override { return get(position); } - - /** - * 设置指定位置为 0 - */ - void reset(size_t position) override - { - size_t key = position / block_size_; - size_t bitPosition = position % block_size_; - - auto itr = container_.find(key); - if (itr != container_.end()) - { - throw std::out_of_range("Position " + std::to_string(position) + " not in container"); - } - - itr->second.reset(position); - } - - // 将所有元素重置为0 - void reset() override - { - for (auto itr = container_.begin(); itr != container_.end(); itr++) - { - itr->second.reset(); - } - } - - /** - * @brief 获取数值为1的位数 - * - * @return size_t - */ - size_t count() const override - { - size_t count = 0; - for (auto itr = container_.begin(); itr != container_.end(); itr++) - { - count += itr->second.count(); - } - return count; - } - - /** - * 获取 bitmap 的大小(bit 数) - */ - size_t size() const override - { - size_t size = 0; - for (auto itr = container_.begin(); itr != container_.end(); itr++) - { - size += itr->second.size(); - } - return size; - } - - std::string to_string() const - { - // todo - return std::string(); - } - - void from_string() - { - // todo - } - - std::vector valuelist() const - { - std::vector keys; - for (const auto& pair : container_) - { - keys.emplace_back(pair.first); - } - std::sort(keys.begin(), keys.end()); - - std::vector result; - for (const auto& key : keys) - { - auto vec = container_.at(key).valuelist(); - result.insert(result.end(), vec.begin(), vec.end()); - } - return result; - } - - /** - * 判断是否相等 - */ - // bool operator==(const roaring_bitmap& other) const - bool equals(const ibitmap& other) const override - { - auto otherPtr = dynamic_cast(&other); - if (block_size() != otherPtr->block_size()) - { - return false; - } - if (container_.size() != otherPtr->container_.size()) - { - return false; - } - for (auto itr = container_.begin(); itr != container_.end(); itr++) - { - auto& key = itr->first; - auto& val = itr->second; - if (!otherPtr->container_.count(key)) - { - return false; - } - if (val != otherPtr->container_.at(key)) - { - return false; - } - } - return true; - } - - bool operator==(const roaring_bitmap& other) const { return equals(other); } - bool operator!=(const roaring_bitmap& other) const { return !equals(other); } - - // // 按位取反 - // roaring_bitmap operator~(const roaring_bitmap& other) const - // { - // // TODO - // } - - /** - * 与另一个 bitmap 进行 AND 操作 - */ - roaring_bitmap operator&(const roaring_bitmap& other) const - { - if (block_size() != other.block_size()) - { - throw std::invalid_argument("RoaringBitmap must have same block_size"); - } - if (container_.size() != other.container_.size()) - { - throw std::invalid_argument("RoaringBitmap must have same size"); - } - roaring_bitmap rBitmap(block_size_); - for (auto itr = container_.begin(); itr != container_.end(); itr++) - { - auto& key = itr->first; - auto& val = itr->second; - - if (other.container_.count(key)) - { - bitmap result = val & other.container_.at(key); - rBitmap.container_.emplace(key, result); - } - } - - return rBitmap; - } - - /** - * 与另一个 bitmap 进行 OR 操作 - */ - roaring_bitmap operator|(const roaring_bitmap& other) const - { - if (block_size() != other.block_size()) - { - throw std::invalid_argument("RoaringBitmap must have same block_size"); - } - - roaring_bitmap rBitmap(block_size_); - for (auto itr = container_.begin(); itr != container_.end(); itr++) - { - auto& key = itr->first; - auto& val = itr->second; - - if (other.container_.count(key)) - { - bitmap result = val | other.container_.at(key); - rBitmap.container_.emplace(key, result); - } - else - { - rBitmap.container_.emplace(key, val); - } - } - - for (auto itr = other.container_.begin(); itr != other.container_.end(); itr++) - { - auto& key = itr->first; - auto& val = itr->second; - if (!rBitmap.container_.count(key)) - { - rBitmap.container_.emplace(key, val); - } - } - - return rBitmap; - } - - /** - * 与另一个 bitmap 进行 Not 操作(按位取反) - */ - roaring_bitmap operator~() const - { - roaring_bitmap rBitmap(block_size_); - for (auto itr = container_.begin(); itr != container_.end(); itr++) - { - auto& key = itr->first; - auto& val = itr->second; - - rBitmap.container_.emplace(key, ~val); - } - return rBitmap; - } - - /** - * 与另一个 bitmap 进行 异或 操作 - */ - roaring_bitmap operator^(const roaring_bitmap& other) const - { - if (block_size() != other.block_size()) - { - throw std::invalid_argument("RoaringBitmap must have same block_size"); - } - if (container_.size() != other.container_.size()) - { - throw std::invalid_argument("RoaringBitmap must have same size"); - } - roaring_bitmap rBitmap(block_size_); - for (auto itr = container_.begin(); itr != container_.end(); itr++) - { - auto& key = itr->first; - auto& val = itr->second; - - if (other.container_.count(key)) - { - bitmap result = val ^ other.container_.at(key); - rBitmap.container_.emplace(key, result); - } - else - { - throw std::invalid_argument("Key " + std::to_string(key) + - " not in other container"); - } - } - - return rBitmap; - } - - // 与 - roaring_bitmap& operator&=(const roaring_bitmap& other) - { - // todo - if (block_size() != other.block_size()) - { - throw std::invalid_argument("RoaringBitmap must have same block_size"); - } - if (container_.size() != other.container_.size()) - { - throw std::invalid_argument("RoaringBitmap must have same size"); - } - - for (auto itr = container_.begin(); itr != container_.end(); itr++) - { - auto& key = itr->first; - auto& val = itr->second; - - if (other.container_.count(key)) - { - val &= other.container_.at(key); - } - else - { - std::string errMsg = "Key " + std::to_string(key) + " not in other container."; - throw std::invalid_argument(errMsg); - } - } - - return *this; - } - - // 或 - roaring_bitmap& operator|=(const roaring_bitmap& other) - { - if (block_size() != other.block_size()) - { - throw std::invalid_argument("RoaringBitmap must have same block_size"); - } - if (container_.size() != other.container_.size()) - { - throw std::invalid_argument("RoaringBitmap must have same size"); - } - - for (auto itr = container_.begin(); itr != container_.end(); itr++) - { - auto& key = itr->first; - auto& val = itr->second; - - if (other.container_.count(key)) - { - val |= other.container_.at(key); - } - else - { - std::string errMsg = "Key " + std::to_string(key) + " not in other container."; - throw std::invalid_argument(errMsg); - } - } - - return *this; - } - - // 异或 - roaring_bitmap& operator^=(const roaring_bitmap& other) - { - if (block_size() != other.block_size()) - { - throw std::invalid_argument("RoaringBitmap must have same block_size"); - } - if (container_.size() != other.container_.size()) - { - throw std::invalid_argument("RoaringBitmap must have same size"); - } - - for (auto itr = container_.begin(); itr != container_.end(); itr++) - { - auto& key = itr->first; - auto& val = itr->second; - - if (other.container_.count(key)) - { - val ^= other.container_.at(key); - } - else - { - std::string errMsg = "Key " + std::to_string(key) + " not in other container."; - throw std::invalid_argument(errMsg); - } - } - - return *this; - } -}; - -#include - -void test_bitmap() -{ - std::cout << "=== 基础 bitmap 示例 ===" << std::endl; - bitmap bitmap1(100); - bitmap1.set(10); - bitmap1.set(20); - bitmap1.set(99); - - std::cout << "位置 10: " << bitmap1.get(10) << std::endl; - std::cout << "位置 10: " << bitmap1.get(10) << std::endl; - std::cout << "位置 25: " << bitmap1[25] << std::endl; - std::cout << "元素数量: " << bitmap1.count() << std::endl; - bitmap1.set(99); // 添加重复元素,数量保持不变 - std::cout << "元素数量: " << bitmap1.count() << std::endl; - bitmap1.reset(99); // 重置位置20的只为0,数量减1 - std::cout << "元素数量: " << bitmap1.count() << std::endl; - - // 逻辑位运算 - bitmap bitmap2(100); - bitmap2.set(20); - bitmap2.set(30); - bitmap2.set(40); - std::cout << "bitmap2 size: " << bitmap2.size() << ", count: " << bitmap2.count() << std::endl; - std::cout << "bitmap1 == bitmap2: " << (bitmap1 == bitmap2) << std::endl; - - auto andResult = bitmap1 & bitmap2; - std::cout << "bitmap1 & bitmap2 : " << andResult.count() << std::endl; - auto orResult = bitmap1 | bitmap2; - std::cout << "bitmap1 | bitmap2 : " << orResult.count() << std::endl; -} - -void test_dynamic_bitmap() -{ - std::cout << "\n=== 动态 bitmap 示例 ===" << std::endl; - dynamic_bitmap dynamicBitmap; - dynamicBitmap.set(20); - dynamicBitmap.set(30); - std::cout << "size: " << dynamicBitmap.size() << ", count: " << dynamicBitmap.count() - << std::endl; - dynamicBitmap.set(80); // 自动扩容 - std::cout << "size: " << dynamicBitmap.size() << ", count: " << dynamicBitmap.count() - << std::endl; -} - -void test_roaring_bitmap() -{ - std::cout << "\n=== RoaringBitmap 示例 ===" << std::endl; - roaring_bitmap bitmap1(64); - bitmap1.set(10); - bitmap1.set(20); - std::cout << "bitmap1 size: " << bitmap1.size() << ", count: " << bitmap1.count() << std::endl; - bitmap1.set(80); - std::cout << "bitmap1 size: " << bitmap1.size() << ", count: " << bitmap1.count() << std::endl; - - roaring_bitmap bitmap2(64); - bitmap2.set(20); - bitmap2.set(40); - bitmap2.set(130); - std::cout << "bitmap2 size: " << bitmap2.size() << ", count: " << bitmap2.count() << std::endl; - std::cout << "bitmap1 == bitmap2: " << (bitmap1 == bitmap2) << std::endl; - - roaring_bitmap andBitmap = bitmap1 & bitmap2; - std::cout << "andBitmap size: " << andBitmap.size() << ", count: " << andBitmap.count() - << std::endl; - roaring_bitmap orBitmap = bitmap1 | bitmap2; - std::cout << "orBitmap size: " << orBitmap.size() << ", count: " << orBitmap.count() - << std::endl; -} - -void bitmapExamples() -{ - test_bitmap(); - test_dynamic_bitmap(); - test_roaring_bitmap(); -} - -int main() -{ - bitmapExamples(); - return 0; -} \ No newline at end of file diff --git a/src/usage_demo/bitmap.hpp b/src/usage_demo/bitmap.hpp new file mode 100644 index 0000000..5f126a7 --- /dev/null +++ b/src/usage_demo/bitmap.hpp @@ -0,0 +1,83 @@ +#include "common_util/bitmap.h" + +void test_bitmap() +{ + std::cout << "=== 基础 bitmap 示例 ===" << std::endl; + bitmap bitmap1(100); + bitmap1.set(10); + bitmap1.set(20); + bitmap1.set(99); + + std::cout << "位置 10: " << bitmap1.get(10) << std::endl; + std::cout << "位置 10: " << bitmap1.get(10) << std::endl; + std::cout << "位置 25: " << bitmap1[25] << std::endl; + std::cout << "元素数量: " << bitmap1.count() << std::endl; + bitmap1.set(99); // 添加重复元素,数量保持不变 + std::cout << "元素数量: " << bitmap1.count() << std::endl; + bitmap1.reset(99); // 重置位置20的只为0,数量减1 + std::cout << "元素数量: " << bitmap1.count() << std::endl; + + // 逻辑位运算 + bitmap bitmap2(100); + bitmap2.set(20); + bitmap2.set(30); + bitmap2.set(40); + std::cout << "bitmap2 size: " << bitmap2.size() << ", count: " << bitmap2.count() << std::endl; + std::cout << "bitmap1 == bitmap2: " << (bitmap1 == bitmap2) << std::endl; + + auto andResult = bitmap1 & bitmap2; + std::cout << "bitmap1 & bitmap2 : " << andResult.count() << std::endl; + auto orResult = bitmap1 | bitmap2; + std::cout << "bitmap1 | bitmap2 : " << orResult.count() << std::endl; +} + +void test_dynamic_bitmap() +{ + std::cout << "\n=== 动态 bitmap 示例 ===" << std::endl; + dynamic_bitmap dynamicBitmap; + dynamicBitmap.set(20); + dynamicBitmap.set(30); + std::cout << "size: " << dynamicBitmap.size() << ", count: " << dynamicBitmap.count() + << std::endl; + dynamicBitmap.set(80); // 自动扩容 + std::cout << "size: " << dynamicBitmap.size() << ", count: " << dynamicBitmap.count() + << std::endl; +} + +void test_roaring_bitmap() +{ + std::cout << "\n=== RoaringBitmap 示例 ===" << std::endl; + roaring_bitmap bitmap1(64); + bitmap1.set(10); + bitmap1.set(20); + std::cout << "bitmap1 size: " << bitmap1.size() << ", count: " << bitmap1.count() << std::endl; + bitmap1.set(80); + std::cout << "bitmap1 size: " << bitmap1.size() << ", count: " << bitmap1.count() << std::endl; + + roaring_bitmap bitmap2(64); + bitmap2.set(20); + bitmap2.set(40); + bitmap2.set(130); + std::cout << "bitmap2 size: " << bitmap2.size() << ", count: " << bitmap2.count() << std::endl; + std::cout << "bitmap1 == bitmap2: " << (bitmap1 == bitmap2) << std::endl; + + roaring_bitmap andBitmap = bitmap1 & bitmap2; + std::cout << "andBitmap size: " << andBitmap.size() << ", count: " << andBitmap.count() + << std::endl; + roaring_bitmap orBitmap = bitmap1 | bitmap2; + std::cout << "orBitmap size: " << orBitmap.size() << ", count: " << orBitmap.count() + << std::endl; +} + +void bitmapExamples() +{ + test_bitmap(); + test_dynamic_bitmap(); + test_roaring_bitmap(); +} + +int main() +{ + bitmapExamples(); + return 0; +} \ No newline at end of file -- Gitee From 27f04112c1587fa4200bbd71d069baa798f1cc6c Mon Sep 17 00:00:00 2001 From: Spencer Date: Sat, 11 Oct 2025 21:24:45 +0800 Subject: [PATCH 19/40] fix: bitmap --- include/common_util/bitmap.h | 13 +++-- src/common_util/bitmap.cpp | 100 +++++++++++++++++++++++++++++------ src/common_util/strfmt.cpp | 9 +++- src/usage_demo/bitmap.hpp | 36 ++++++------- 4 files changed, 121 insertions(+), 37 deletions(-) diff --git a/include/common_util/bitmap.h b/include/common_util/bitmap.h index 4bf4b64..3fb02b7 100644 --- a/include/common_util/bitmap.h +++ b/include/common_util/bitmap.h @@ -26,7 +26,7 @@ public: virtual size_t count() const = 0; virtual size_t size() const = 0; virtual std::string to_string() const = 0; - virtual void from_string() = 0; + virtual void from_string(const std::string text) = 0; virtual std::vector valuelist() const = 0; // 操作符重载 @@ -83,7 +83,7 @@ public: std::string to_string() const; - void from_string(); + void from_string(const std::string text); std::vector valuelist() const; @@ -120,6 +120,13 @@ public: bitmap& operator|=(const bitmap& other); bitmap& operator^=(const bitmap& other); + +private: + /** + * 转换成 十六进制的字符串 + * @param compress 0: 不压缩,1: 压缩 + */ + std::string to_hex(int compress = 1) const; }; class dynamic_bitmap : public bitmap @@ -191,7 +198,7 @@ public: std::string to_string() const; - void from_string(); + void from_string(const std::string text); std::vector valuelist() const; diff --git a/src/common_util/bitmap.cpp b/src/common_util/bitmap.cpp index 006c4a7..c35024c 100644 --- a/src/common_util/bitmap.cpp +++ b/src/common_util/bitmap.cpp @@ -1,12 +1,9 @@ -#include +#include "bitmap.h" +#include "inner/logger.h" +#include "strfmt.h" +#include #include -// #include -// #include #include -// #include -// #include -// #include -#include "bitmap.h" namespace cutl { @@ -96,15 +93,63 @@ size_t bitmap::count() const return count; } +std::string bitmap::to_hex(int compress) const +{ + if (compress == 0) + { + return cutl::to_hex(bits_.data(), size_, true, 0); + } + else + { + // 找到最后一个非零字节的索引 + int lastNonZeroIndex = -1; + for (size_t i = bits_.size() - 1; i >= 0; i--) + { + if (bits_[i] != 0) + { + lastNonZeroIndex = i; + break; + } + } + + // 如果所有字节都是零,返回空字符串 + if (lastNonZeroIndex == -1) + { + return ""; + } + + // 只序列化到最后一个非零字节 + return cutl::to_hex(bits_.data(), lastNonZeroIndex + 1, true, 0); + } +} + std::string bitmap::to_string() const { - // todo - return std::string(); + return this->to_hex(1); } -void bitmap::from_string() +void bitmap::from_string(const std::string text) { - // todo + for (char c : text) + { + if (!isxdigit(c)) + { + throw std::runtime_error("Invalid hexadecimal string"); + } + } + + // 先将bitmap所有数据重置成0 + this->reset(); + + // 每两个字符表示一个字节 + const size_t expectedLength = bits_.size() * 2; + size_t strLen = std::min(expectedLength, text.length()); + // 将十六进制字符串转换为字节数组 + for (size_t i = 0; i < strLen; i++) + { + std::string hexByte = text.substr(i * 2, 2); + bits_[i] = std::stoi(hexByte, nullptr, 16); + } } std::vector bitmap::valuelist() const @@ -160,23 +205,31 @@ bitmap bitmap::operator&(const bitmap& other) const return result; } +// #include + /** * 与另一个 bitmap 进行 OR 操作 */ bitmap bitmap::operator|(const bitmap& other) const { + CUTL_INFO("00"); auto minSize = std::min(size_, other.size_); auto maxSize = std::max(size_, other.size_); + CUTL_INFO("11"); + // std::cout << "minSize: " << minSize << ", maxSize: " << maxSize << std::endl; + bitmap result(maxSize); for (size_t i = 0; i < minSize; i++) { result.bits_[i] = bits_[i] | other.bits_[i]; } + CUTL_INFO("22"); for (size_t i = minSize; i < maxSize; i++) { result.bits_[i] = size_ > other.size_ ? bits_[i] : other.bits_[i]; } + CUTL_INFO("33"); return result; } @@ -368,11 +421,29 @@ size_t roaring_bitmap::size() const std::string roaring_bitmap::to_string() const { - // todo - return std::string(); + std::vector keys; + for (const auto& pair : container_) + { + keys.emplace_back(pair.first); + } + std::sort(keys.begin(), keys.end()); + + std::string result("{"); + for (size_t i = 0; i < keys.size(); i++) + { + auto& key = keys[i]; + auto& val = container_.at(key); + result += "\"" + std::to_string(key) + "\":\"" + val.to_string() + "\""; + if (i != keys.size() - 1) + { + result += ","; + } + } + result += "}"; + return result; } -void roaring_bitmap::from_string() +void roaring_bitmap::from_string(const std::string text) { // todo } @@ -546,7 +617,6 @@ roaring_bitmap roaring_bitmap::operator^(const roaring_bitmap& other) const // 与 roaring_bitmap& roaring_bitmap::operator&=(const roaring_bitmap& other) { - // todo if (block_size() != other.block_size()) { throw std::invalid_argument("RoaringBitmap must have same block_size"); diff --git a/src/common_util/strfmt.cpp b/src/common_util/strfmt.cpp index 329bb96..75507e7 100644 --- a/src/common_util/strfmt.cpp +++ b/src/common_util/strfmt.cpp @@ -206,7 +206,14 @@ namespace cutl const char temp = data[i]; output.push_back(hex_chars[temp / 16]); output.push_back(hex_chars[temp % 16]); - output.push_back(separator); + if (!separator) + { + output.push_back(separator); + } + else + { + // separator为0或nullptr时,不添加分隔符 + } } return output; diff --git a/src/usage_demo/bitmap.hpp b/src/usage_demo/bitmap.hpp index 5f126a7..4211344 100644 --- a/src/usage_demo/bitmap.hpp +++ b/src/usage_demo/bitmap.hpp @@ -1,9 +1,11 @@ -#include "common_util/bitmap.h" +#include "common.hpp" +#include "common_util/bitmap.h" void test_bitmap() { - std::cout << "=== 基础 bitmap 示例 ===" << std::endl; - bitmap bitmap1(100); + PrintSubTitle("bitmap"); + + cutl::bitmap bitmap1(100); bitmap1.set(10); bitmap1.set(20); bitmap1.set(99); @@ -18,7 +20,7 @@ void test_bitmap() std::cout << "元素数量: " << bitmap1.count() << std::endl; // 逻辑位运算 - bitmap bitmap2(100); + cutl::bitmap bitmap2(100); bitmap2.set(20); bitmap2.set(30); bitmap2.set(40); @@ -33,8 +35,9 @@ void test_bitmap() void test_dynamic_bitmap() { - std::cout << "\n=== 动态 bitmap 示例 ===" << std::endl; - dynamic_bitmap dynamicBitmap; + PrintSubTitle("cutl::dynamic_bitmap"); + + cutl::dynamic_bitmap dynamicBitmap; dynamicBitmap.set(20); dynamicBitmap.set(30); std::cout << "size: " << dynamicBitmap.size() << ", count: " << dynamicBitmap.count() @@ -46,38 +49,35 @@ void test_dynamic_bitmap() void test_roaring_bitmap() { - std::cout << "\n=== RoaringBitmap 示例 ===" << std::endl; - roaring_bitmap bitmap1(64); + PrintSubTitle("roaring_bitmap"); + + cutl::roaring_bitmap bitmap1(64); bitmap1.set(10); bitmap1.set(20); std::cout << "bitmap1 size: " << bitmap1.size() << ", count: " << bitmap1.count() << std::endl; bitmap1.set(80); std::cout << "bitmap1 size: " << bitmap1.size() << ", count: " << bitmap1.count() << std::endl; - roaring_bitmap bitmap2(64); + cutl::roaring_bitmap bitmap2(64); bitmap2.set(20); bitmap2.set(40); bitmap2.set(130); std::cout << "bitmap2 size: " << bitmap2.size() << ", count: " << bitmap2.count() << std::endl; std::cout << "bitmap1 == bitmap2: " << (bitmap1 == bitmap2) << std::endl; - roaring_bitmap andBitmap = bitmap1 & bitmap2; + cutl::roaring_bitmap andBitmap = bitmap1 & bitmap2; std::cout << "andBitmap size: " << andBitmap.size() << ", count: " << andBitmap.count() << std::endl; - roaring_bitmap orBitmap = bitmap1 | bitmap2; + cutl::roaring_bitmap orBitmap = bitmap1 | bitmap2; std::cout << "orBitmap size: " << orBitmap.size() << ", count: " << orBitmap.count() << std::endl; } -void bitmapExamples() +void BitmapTest() { + PrintTitle("BitmapTest"); + test_bitmap(); test_dynamic_bitmap(); test_roaring_bitmap(); } - -int main() -{ - bitmapExamples(); - return 0; -} \ No newline at end of file -- Gitee From c566617fd321a55083a4fe2b89180dedd21cd2cc Mon Sep 17 00:00:00 2001 From: Spencer Date: Sun, 12 Oct 2025 16:34:27 +0800 Subject: [PATCH 20/40] fix: bitmap --- include/common_util/bitmap.h | 6 +- src/common_util/bitmap.cpp | 118 ++++++++++++++++++----------- src/common_util/strfmt.cpp | 2 +- src/usage_demo/bitmap.hpp | 139 +++++++++++++++++++++++------------ 4 files changed, 173 insertions(+), 92 deletions(-) diff --git a/include/common_util/bitmap.h b/include/common_util/bitmap.h index 3fb02b7..21dbae9 100644 --- a/include/common_util/bitmap.h +++ b/include/common_util/bitmap.h @@ -26,7 +26,7 @@ public: virtual size_t count() const = 0; virtual size_t size() const = 0; virtual std::string to_string() const = 0; - virtual void from_string(const std::string text) = 0; + virtual void from_string(const std::string& text) = 0; virtual std::vector valuelist() const = 0; // 操作符重载 @@ -83,7 +83,7 @@ public: std::string to_string() const; - void from_string(const std::string text); + void from_string(const std::string& text); std::vector valuelist() const; @@ -198,7 +198,7 @@ public: std::string to_string() const; - void from_string(const std::string text); + void from_string(const std::string& text); std::vector valuelist() const; diff --git a/src/common_util/bitmap.cpp b/src/common_util/bitmap.cpp index c35024c..06a4929 100644 --- a/src/common_util/bitmap.cpp +++ b/src/common_util/bitmap.cpp @@ -4,6 +4,8 @@ #include #include #include +// #include +// #include namespace cutl { @@ -102,8 +104,8 @@ std::string bitmap::to_hex(int compress) const else { // 找到最后一个非零字节的索引 - int lastNonZeroIndex = -1; - for (size_t i = bits_.size() - 1; i >= 0; i--) + int lastNonZeroIndex = 0; + for (int i = bits_.size() - 1; i >= 0; i--) { if (bits_[i] != 0) { @@ -113,7 +115,7 @@ std::string bitmap::to_hex(int compress) const } // 如果所有字节都是零,返回空字符串 - if (lastNonZeroIndex == -1) + if (lastNonZeroIndex == 0) { return ""; } @@ -128,7 +130,7 @@ std::string bitmap::to_string() const return this->to_hex(1); } -void bitmap::from_string(const std::string text) +void bitmap::from_string(const std::string& text) { for (char c : text) { @@ -144,8 +146,9 @@ void bitmap::from_string(const std::string text) // 每两个字符表示一个字节 const size_t expectedLength = bits_.size() * 2; size_t strLen = std::min(expectedLength, text.length()); + size_t byteSize = strLen >> 1; // strLen / 2 // 将十六进制字符串转换为字节数组 - for (size_t i = 0; i < strLen; i++) + for (size_t i = 0; i < byteSize; i++) { std::string hexByte = text.substr(i * 2, 2); bits_[i] = std::stoi(hexByte, nullptr, 16); @@ -196,8 +199,8 @@ bool bitmap::equals(const ibitmap& other) const */ bitmap bitmap::operator&(const bitmap& other) const { - auto minSize = std::min(size_, other.size_); - bitmap result(minSize); + auto minSize = std::min(bits_.size(), other.bits_.size()); + bitmap result(minSize << 3); // minSize * 8 for (size_t i = 0; i < minSize; i++) { result.bits_[i] = bits_[i] & other.bits_[i]; @@ -212,24 +215,18 @@ bitmap bitmap::operator&(const bitmap& other) const */ bitmap bitmap::operator|(const bitmap& other) const { - CUTL_INFO("00"); - auto minSize = std::min(size_, other.size_); - auto maxSize = std::max(size_, other.size_); - - CUTL_INFO("11"); - // std::cout << "minSize: " << minSize << ", maxSize: " << maxSize << std::endl; + auto minSize = std::min(bits_.size(), other.bits_.size()); + auto maxSize = std::max(bits_.size(), other.bits_.size()); - bitmap result(maxSize); + bitmap result(maxSize << 3); // maxSize * 8 for (size_t i = 0; i < minSize; i++) { result.bits_[i] = bits_[i] | other.bits_[i]; } - CUTL_INFO("22"); for (size_t i = minSize; i < maxSize; i++) { - result.bits_[i] = size_ > other.size_ ? bits_[i] : other.bits_[i]; + result.bits_[i] = bits_.size() > other.bits_.size() ? bits_[i] : other.bits_[i]; } - CUTL_INFO("33"); return result; } @@ -358,7 +355,7 @@ bool roaring_bitmap::get(size_t position) const size_t bitPosition = position % block_size_; auto itr = container_.find(key); - if (itr != container_.end()) + if (itr == container_.end()) { return false; } @@ -374,12 +371,12 @@ void roaring_bitmap::reset(size_t position) size_t bitPosition = position % block_size_; auto itr = container_.find(key); - if (itr != container_.end()) + if (itr == container_.end()) { throw std::out_of_range("Position " + std::to_string(position) + " not in container"); } - itr->second.reset(position); + itr->second.reset(bitPosition); } // 将所有元素重置为0 @@ -443,9 +440,65 @@ std::string roaring_bitmap::to_string() const return result; } -void roaring_bitmap::from_string(const std::string text) +void roaring_bitmap::from_string(const std::string& text) { - // todo + // 清空现有数据 + container_.clear(); + + // 简单校验格式({...}结构) + if (text.empty() || text.front() != '{' || text.back() != '}') + { + throw std::runtime_error("Invalid roaring bitmap string format"); + } + + // 提取中间内容 + std::string content = text.substr(1, text.size() - 2); + if (content.empty()) + { + return; // 空 bitmap + } + + // 分割键值对 + size_t pos = 0; + while (pos < content.size()) + { + // 查找键的引号 + size_t keyStart = content.find('"', pos); + if (keyStart == std::string::npos) + break; + + size_t keyEnd = content.find('"', keyStart + 1); + if (keyEnd == std::string::npos) + break; + + // 解析键(block索引) + std::string keyStr = content.substr(keyStart + 1, keyEnd - keyStart - 1); + size_t key = std::stoull(keyStr); + + // 查找值的引号 + size_t valStart = content.find('"', keyEnd + 1); + if (valStart == std::string::npos) + break; + + size_t valEnd = content.find('"', valStart + 1); + if (valEnd == std::string::npos) + break; + + // 解析值(bitmap的十六进制字符串) + std::string valStr = content.substr(valStart + 1, valEnd - valStart - 1); + + // 创建对应block并从字符串加载数据 + bitmap block(block_size_); + block.from_string(valStr); + container_.emplace(key, block); + + // 移动到下一个键值对 + pos = valEnd + 1; + if (pos < content.size() && content[pos] == ',') + { + pos++; + } + } } std::vector roaring_bitmap::valuelist() const @@ -505,10 +558,6 @@ roaring_bitmap roaring_bitmap::operator&(const roaring_bitmap& other) const { throw std::invalid_argument("RoaringBitmap must have same block_size"); } - if (container_.size() != other.container_.size()) - { - throw std::invalid_argument("RoaringBitmap must have same size"); - } roaring_bitmap rBitmap(block_size_); for (auto itr = container_.begin(); itr != container_.end(); itr++) { @@ -590,10 +639,6 @@ roaring_bitmap roaring_bitmap::operator^(const roaring_bitmap& other) const { throw std::invalid_argument("RoaringBitmap must have same block_size"); } - if (container_.size() != other.container_.size()) - { - throw std::invalid_argument("RoaringBitmap must have same size"); - } roaring_bitmap rBitmap(block_size_); for (auto itr = container_.begin(); itr != container_.end(); itr++) { @@ -603,6 +648,7 @@ roaring_bitmap roaring_bitmap::operator^(const roaring_bitmap& other) const if (other.container_.count(key)) { bitmap result = val ^ other.container_.at(key); + // std::cout << "key:" << key << ", result: " << result.to_string() << std::endl; rBitmap.container_.emplace(key, result); } else @@ -621,10 +667,6 @@ roaring_bitmap& roaring_bitmap::operator&=(const roaring_bitmap& other) { throw std::invalid_argument("RoaringBitmap must have same block_size"); } - if (container_.size() != other.container_.size()) - { - throw std::invalid_argument("RoaringBitmap must have same size"); - } for (auto itr = container_.begin(); itr != container_.end(); itr++) { @@ -652,10 +694,6 @@ roaring_bitmap& roaring_bitmap::operator|=(const roaring_bitmap& other) { throw std::invalid_argument("RoaringBitmap must have same block_size"); } - if (container_.size() != other.container_.size()) - { - throw std::invalid_argument("RoaringBitmap must have same size"); - } for (auto itr = container_.begin(); itr != container_.end(); itr++) { @@ -683,10 +721,6 @@ roaring_bitmap& roaring_bitmap::operator^=(const roaring_bitmap& other) { throw std::invalid_argument("RoaringBitmap must have same block_size"); } - if (container_.size() != other.container_.size()) - { - throw std::invalid_argument("RoaringBitmap must have same size"); - } for (auto itr = container_.begin(); itr != container_.end(); itr++) { diff --git a/src/common_util/strfmt.cpp b/src/common_util/strfmt.cpp index 75507e7..3ebf024 100644 --- a/src/common_util/strfmt.cpp +++ b/src/common_util/strfmt.cpp @@ -206,7 +206,7 @@ namespace cutl const char temp = data[i]; output.push_back(hex_chars[temp / 16]); output.push_back(hex_chars[temp % 16]); - if (!separator) + if (separator) { output.push_back(separator); } diff --git a/src/usage_demo/bitmap.hpp b/src/usage_demo/bitmap.hpp index 4211344..9190b8a 100644 --- a/src/usage_demo/bitmap.hpp +++ b/src/usage_demo/bitmap.hpp @@ -1,5 +1,6 @@ #include "common.hpp" #include "common_util/bitmap.h" +#include void test_bitmap() { @@ -10,41 +11,66 @@ void test_bitmap() bitmap1.set(20); bitmap1.set(99); - std::cout << "位置 10: " << bitmap1.get(10) << std::endl; - std::cout << "位置 10: " << bitmap1.get(10) << std::endl; - std::cout << "位置 25: " << bitmap1[25] << std::endl; - std::cout << "元素数量: " << bitmap1.count() << std::endl; - bitmap1.set(99); // 添加重复元素,数量保持不变 - std::cout << "元素数量: " << bitmap1.count() << std::endl; - bitmap1.reset(99); // 重置位置20的只为0,数量减1 - std::cout << "元素数量: " << bitmap1.count() << std::endl; + // 基础功能测试 + assert(bitmap1.get(10) == true); + assert(bitmap1.get(25) == false); + assert(bitmap1.count() == 3); - // 逻辑位运算 + // 重复设置测试 + bitmap1.set(99); + assert(bitmap1.count() == 3); + + // 重置测试 + bitmap1.reset(99); + assert(bitmap1.count() == 2); + + // 字符串转换测试 + std::string hexStr = bitmap1.to_string(); + std::cout << "hexStr: " << hexStr << std::endl; cutl::bitmap bitmap2(100); - bitmap2.set(20); - bitmap2.set(30); - bitmap2.set(40); - std::cout << "bitmap2 size: " << bitmap2.size() << ", count: " << bitmap2.count() << std::endl; - std::cout << "bitmap1 == bitmap2: " << (bitmap1 == bitmap2) << std::endl; - - auto andResult = bitmap1 & bitmap2; - std::cout << "bitmap1 & bitmap2 : " << andResult.count() << std::endl; - auto orResult = bitmap1 | bitmap2; - std::cout << "bitmap1 | bitmap2 : " << orResult.count() << std::endl; + bitmap2.from_string(hexStr); + assert(bitmap2.equals(bitmap1)); + + // 位运算测试 + cutl::bitmap bitmap3(100); + bitmap3.set(20); + bitmap3.set(30); + + auto andResult = bitmap1 & bitmap3; + assert(andResult.count() == 1); // 仅20位置为1 + + auto orResult = bitmap1 | bitmap3; + assert(orResult.count() == 3); // 10,20,30 + + auto xorResult = bitmap1 ^ bitmap3; + // std::cout << "xorResult.count: " << xorResult.count() << std::endl; + assert(xorResult.count() == 2); // 10,30 + + auto notResult = ~bitmap1; + assert(notResult.get(10) == false); + assert(notResult.get(0) == true); + + std::cout << "bitmap tests passed" << std::endl; } void test_dynamic_bitmap() { - PrintSubTitle("cutl::dynamic_bitmap"); - - cutl::dynamic_bitmap dynamicBitmap; - dynamicBitmap.set(20); - dynamicBitmap.set(30); - std::cout << "size: " << dynamicBitmap.size() << ", count: " << dynamicBitmap.count() - << std::endl; - dynamicBitmap.set(80); // 自动扩容 - std::cout << "size: " << dynamicBitmap.size() << ", count: " << dynamicBitmap.count() - << std::endl; + PrintSubTitle("dynamic_bitmap"); + + cutl::dynamic_bitmap dynamicBitmap(10); + assert(dynamicBitmap.size() == 10); + + // 扩容测试 + dynamicBitmap.set(15); + assert(dynamicBitmap.size() >= 16); // 扩容为20(原10*2) + assert(dynamicBitmap.get(15) == true); + + // 更大范围扩容 + dynamicBitmap.set(100); + assert(dynamicBitmap.size() >= 101); + assert(dynamicBitmap.get(100) == true); + + std::cout << "dynamic_bitmap tests passed" << std::endl; } void test_roaring_bitmap() @@ -54,30 +80,51 @@ void test_roaring_bitmap() cutl::roaring_bitmap bitmap1(64); bitmap1.set(10); bitmap1.set(20); - std::cout << "bitmap1 size: " << bitmap1.size() << ", count: " << bitmap1.count() << std::endl; - bitmap1.set(80); - std::cout << "bitmap1 size: " << bitmap1.size() << ", count: " << bitmap1.count() << std::endl; + bitmap1.set(80); // 属于第二个block(80/64=1) + + // 基础功能测试 + assert(bitmap1.count() == 3); + assert(bitmap1.get(20) == true); + assert(bitmap1.get(80) == true); + assert(bitmap1.size() == 128); // 2个block(0和1) + // 字符串转换测试 + std::string str = bitmap1.to_string(); cutl::roaring_bitmap bitmap2(64); - bitmap2.set(20); - bitmap2.set(40); - bitmap2.set(130); - std::cout << "bitmap2 size: " << bitmap2.size() << ", count: " << bitmap2.count() << std::endl; - std::cout << "bitmap1 == bitmap2: " << (bitmap1 == bitmap2) << std::endl; - - cutl::roaring_bitmap andBitmap = bitmap1 & bitmap2; - std::cout << "andBitmap size: " << andBitmap.size() << ", count: " << andBitmap.count() - << std::endl; - cutl::roaring_bitmap orBitmap = bitmap1 | bitmap2; - std::cout << "orBitmap size: " << orBitmap.size() << ", count: " << orBitmap.count() - << std::endl; + bitmap2.from_string(str); + assert(bitmap2.equals(bitmap1)); + + // 位运算测试 + cutl::roaring_bitmap bitmap3(64); + bitmap3.set(20); + bitmap3.set(40); + bitmap3.set(130); // 130/64=2(第三个block) + + auto andBitmap = bitmap1 & bitmap3; + assert(andBitmap.count() == 1); // 仅20 + + auto orBitmap = bitmap1 | bitmap3; + assert(orBitmap.count() == 5); // 10,20,40,80,130 + + cutl::roaring_bitmap bitmap4(64); + bitmap4.set(20); + bitmap4.set(40); + bitmap4.set(80); + // bitmap1: 10,20,80 + auto xorBitmap = bitmap1 ^ bitmap4; + assert(xorBitmap.count() == 2); // 10,40 + + // 重置测试 + bitmap1.reset(20); + assert(bitmap1.count() == 2); + + std::cout << "roaring_bitmap tests passed" << std::endl; } void BitmapTest() { PrintTitle("BitmapTest"); - test_bitmap(); test_dynamic_bitmap(); test_roaring_bitmap(); -} +} \ No newline at end of file -- Gitee From 523cb09c2d987f3200faa00526a0b0dbce769f0c Mon Sep 17 00:00:00 2001 From: Spencer Date: Sun, 12 Oct 2025 16:59:41 +0800 Subject: [PATCH 21/40] fix: bitmap.hpp --- src/usage_demo/bitmap.hpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/usage_demo/bitmap.hpp b/src/usage_demo/bitmap.hpp index 9190b8a..d0246e4 100644 --- a/src/usage_demo/bitmap.hpp +++ b/src/usage_demo/bitmap.hpp @@ -26,7 +26,7 @@ void test_bitmap() // 字符串转换测试 std::string hexStr = bitmap1.to_string(); - std::cout << "hexStr: " << hexStr << std::endl; + std::cout << "bitmap1 str: " << hexStr << std::endl; cutl::bitmap bitmap2(100); bitmap2.from_string(hexStr); assert(bitmap2.equals(bitmap1)); @@ -36,6 +36,7 @@ void test_bitmap() bitmap3.set(20); bitmap3.set(30); + // bitmap1: 10,20 auto andResult = bitmap1 & bitmap3; assert(andResult.count() == 1); // 仅20位置为1 @@ -43,10 +44,11 @@ void test_bitmap() assert(orResult.count() == 3); // 10,20,30 auto xorResult = bitmap1 ^ bitmap3; - // std::cout << "xorResult.count: " << xorResult.count() << std::endl; assert(xorResult.count() == 2); // 10,30 auto notResult = ~bitmap1; + // std::cout << "notResult count:" << notResult.count() << std::endl; + assert(notResult.count() == 102); assert(notResult.get(10) == false); assert(notResult.get(0) == true); @@ -85,11 +87,13 @@ void test_roaring_bitmap() // 基础功能测试 assert(bitmap1.count() == 3); assert(bitmap1.get(20) == true); + assert(bitmap1.get(25) == false); assert(bitmap1.get(80) == true); assert(bitmap1.size() == 128); // 2个block(0和1) // 字符串转换测试 std::string str = bitmap1.to_string(); + std::cout << "bitmap1 str: " << str << std::endl; cutl::roaring_bitmap bitmap2(64); bitmap2.from_string(str); assert(bitmap2.equals(bitmap1)); @@ -100,6 +104,7 @@ void test_roaring_bitmap() bitmap3.set(40); bitmap3.set(130); // 130/64=2(第三个block) + // bitmap1: 10,20,80 auto andBitmap = bitmap1 & bitmap3; assert(andBitmap.count() == 1); // 仅20 @@ -116,7 +121,7 @@ void test_roaring_bitmap() // 重置测试 bitmap1.reset(20); - assert(bitmap1.count() == 2); + assert(bitmap1.count() == 2); // 10, 80 std::cout << "roaring_bitmap tests passed" << std::endl; } -- Gitee From 9fa38605833c0785f73e420500d1fef38718044f Mon Sep 17 00:00:00 2001 From: Spencer Date: Sun, 12 Oct 2025 17:24:08 +0800 Subject: [PATCH 22/40] fix: bitmap.h --- include/common_util/bitmap.h | 406 +++++++++++++++++++++++++++++++---- src/usage_demo/main.cpp | 6 +- 2 files changed, 363 insertions(+), 49 deletions(-) diff --git a/include/common_util/bitmap.h b/include/common_util/bitmap.h index 21dbae9..a675b5c 100644 --- a/include/common_util/bitmap.h +++ b/include/common_util/bitmap.h @@ -1,9 +1,26 @@ -#pragma once +/** + * @copyright Copyright (c) 2025, Spencer.Luo. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing permissions and + * limitations. + * + * @file bitmap.h + * @brief bitmap class, include bitmap、dynamic_bitmap、roaring_bitmap. + * @author Spencer + * @date 2025-10-12 + */ + +#pragma once -// #include -// #include #include -// #include #include #include #include @@ -11,233 +28,528 @@ namespace cutl { -// ibitmap 统一接口类 +/** + * @brief Interface class for bitmap operations + * + * This abstract class defines the common interface for all bitmap implementations, + * including basic bit manipulation and conversion operations. + */ class ibitmap { public: + /** + * @brief Destroy the ibitmap object + * + * Virtual destructor to ensure proper cleanup of derived classes + */ virtual ~ibitmap() = default; public: - // 基本操作 + // Basic operations + /** + * @brief Set the bit at specified position to 1 + * + * @param position The index of the bit to set + */ virtual void set(size_t position) = 0; + + /** + * @brief Get the value of the bit at specified position + * + * @param position The index of the bit to retrieve + * @return true if the bit is set (1), false otherwise (0) + */ virtual bool get(size_t position) const = 0; + + /** + * @brief Set the bit at specified position to 0 + * + * @param position The index of the bit to reset + */ virtual void reset(size_t position) = 0; + + /** + * @brief Reset all bits to 0 + */ virtual void reset() = 0; + + /** + * @brief Get the number of bits set to 1 + * + * @return size_t The count of set bits + */ virtual size_t count() const = 0; + + /** + * @brief Get the total number of bits that the bitmap can hold + * + * @return size_t The capacity of the bitmap in bits + */ virtual size_t size() const = 0; + + /** + * @brief Convert the bitmap to a string representation + * + * @return std::string String representation of the bitmap + */ virtual std::string to_string() const = 0; + + /** + * @brief Initialize the bitmap from a string representation + * + * @param text The string to parse and load into the bitmap + */ virtual void from_string(const std::string& text) = 0; + + /** + * @brief Get a list of positions where bits are set to 1 + * + * @return std::vector Vector containing indices of set bits + */ virtual std::vector valuelist() const = 0; - // 操作符重载 + // Operator overloads + /** + * @brief Overload of [] operator to get bit value + * + * @param position The index of the bit to retrieve + * @return true if the bit is set (1), false otherwise (0) + */ virtual bool operator[](size_t position) const = 0; - // 比较操作 + + // Comparison operations + /** + * @brief Check if two bitmaps are equal + * + * @param other The bitmap to compare with + * @return true if bitmaps are equal, false otherwise + */ virtual bool equals(const ibitmap& other) const = 0; }; +/** + * @brief A fixed-size bitmap implementation + * + * This class provides a basic bitmap with a fixed size, implementing all + * methods defined in the ibitmap interface. It stores bits in a vector of bytes. + */ class bitmap : public ibitmap { protected: - std::vector bits_; - size_t size_; + std::vector bits_; ///< Underlying storage for bits + size_t size_; ///< Total number of bits the bitmap can hold public: + /** + * @brief Construct a new bitmap object with specified size + * + * @param size The number of bits the bitmap should hold + */ bitmap(size_t size); + + /** + * @brief Destroy the bitmap object + */ ~bitmap() = default; public: /** - * 设置指定位置为 1 + * @brief Set the bit at specified position to 1 + * + * @param position The index of the bit to set + * @throw std::out_of_range If position is beyond bitmap size */ void set(size_t position) override; /** - * 获取指定位置的值 + * @brief Get the value of the bit at specified position + * + * @param position The index of the bit to retrieve + * @return true if the bit is set (1), false otherwise (0) + * @throw std::out_of_range If position is beyond bitmap size */ bool get(size_t position) const override; /** - * 重载 [] 操作符 + * @brief Overload of [] operator to get bit value + * + * @param position The index of the bit to retrieve + * @return true if the bit is set (1), false otherwise (0) */ bool operator[](size_t position) const override { return get(position); } /** - * 设置指定位置为 0 + * @brief Set the bit at specified position to 0 + * + * @param position The index of the bit to reset + * @throw std::out_of_range If position is beyond bitmap size */ void reset(size_t position) override; - // 将所有元素重置为0 + /** + * @brief Reset all bits to 0 + */ void reset() override; /** - * @brief 获取数值为1的位数 + * @brief Get the number of bits set to 1 * - * @return size_t + * @return size_t The count of set bits */ size_t count() const override; /** - * 获取 bitmap 的大小(bit 数) + * @brief Get the total number of bits that the bitmap can hold + * + * @return size_t The capacity of the bitmap in bits */ size_t size() const override { return size_; } + /** + * @brief Convert the bitmap to a compressed hex string representation + * + * @return std::string Compressed hex string of the bitmap + */ std::string to_string() const; + /** + * @brief Initialize the bitmap from a hex string representation + * + * @param text The hex string to parse and load + * @throw std::runtime_error If the input string contains invalid hex characters + */ void from_string(const std::string& text); + /** + * @brief Get a list of positions where bits are set to 1 + * + * @return std::vector Vector containing indices of set bits + */ std::vector valuelist() const; + /** + * @brief Check if two bitmaps are equal + * + * @param other The bitmap to compare with + * @return true if bitmaps are equal, false otherwise + */ bool equals(const ibitmap& other) const override; /** - * 判断是否相等 + * @brief Check if two bitmaps are equal + * + * @param other The bitmap to compare with + * @return true if bitmaps are equal, false otherwise */ bool operator==(const bitmap& other) const { return equals(other); } /** - * 判断是否不相等 + * @brief Check if two bitmaps are not equal + * + * @param other The bitmap to compare with + * @return true if bitmaps are not equal, false otherwise */ bool operator!=(const bitmap& other) const { return !equals(other); } /** - * 与另一个 bitmap 进行 AND 操作 + * @brief Perform bitwise AND with another bitmap + * + * @param other The bitmap to AND with + * @return bitmap Result of the AND operation */ bitmap operator&(const bitmap& other) const; /** - * 与另一个 bitmap 进行 OR 操作 + * @brief Perform bitwise OR with another bitmap + * + * @param other The bitmap to OR with + * @return bitmap Result of the OR operation */ bitmap operator|(const bitmap& other) const; - // 按位取反 + /** + * @brief Perform bitwise NOT operation + * + * @return bitmap Result of the NOT operation + */ bitmap operator~() const; - // 按位异或 + /** + * @brief Perform bitwise XOR with another bitmap + * + * @param other The bitmap to XOR with + * @return bitmap Result of the XOR operation + * @throw std::invalid_argument If bitmaps have different sizes + */ bitmap operator^(const bitmap& other) const; + /** + * @brief Perform bitwise AND with another bitmap and assign result + * + * @param other The bitmap to AND with + * @return bitmap& Reference to this bitmap after operation + * @throw std::invalid_argument If bitmaps have different sizes + */ bitmap& operator&=(const bitmap& other); + /** + * @brief Perform bitwise OR with another bitmap and assign result + * + * @param other The bitmap to OR with + * @return bitmap& Reference to this bitmap after operation + * @throw std::invalid_argument If bitmaps have different sizes + */ bitmap& operator|=(const bitmap& other); + /** + * @brief Perform bitwise XOR with another bitmap and assign result + * + * @param other The bitmap to XOR with + * @return bitmap& Reference to this bitmap after operation + * @throw std::invalid_argument If bitmaps have different sizes + */ bitmap& operator^=(const bitmap& other); private: /** - * 转换成 十六进制的字符串 - * @param compress 0: 不压缩,1: 压缩 + * @brief Convert the bitmap to a hexadecimal string + * + * @param compress 0 for uncompressed, 1 for compressed (truncates trailing zeros) + * @return std::string Hexadecimal string representation */ std::string to_hex(int compress = 1) const; }; +/** + * @brief A dynamically resizable bitmap implementation + * + * This class extends the basic bitmap with automatic resizing functionality, + * allowing it to grow as needed when setting bits beyond the current capacity. + */ class dynamic_bitmap : public bitmap { public: + /** + * @brief Construct a new dynamic_bitmap object + * + * @param init_size Initial number of bits (default: 64) + */ dynamic_bitmap(size_t init_size = 64); + + /** + * @brief Destroy the dynamic_bitmap object + */ ~dynamic_bitmap() = default; public: /** - * 设置指定位置为 1 + * @brief Set the bit at specified position to 1, resizing if necessary + * + * @param position The index of the bit to set */ void set(size_t position) override; private: /** - * 动态扩容 + * @brief Ensure the bitmap can hold at least the specified number of bits + * + * @param minSize The minimum number of bits required */ void ensureCapacity(size_t minSize); }; +/** + * @brief A roaring bitmap implementation for efficient sparse bit storage + * + * This class uses a block-based approach to store bits, which is more memory-efficient + * for sparse bitmaps. It divides the bitmap into blocks of fixed size and only + * allocates storage for blocks that contain set bits. + */ class roaring_bitmap : public ibitmap { private: - size_t block_size_{ 0 }; - std::unordered_map container_; + size_t block_size_; ///< Size of each block in bits + std::unordered_map + container_; ///< Storage for blocks (key: block index, value: bitmap block) public: + /** + * @brief Construct a new roaring_bitmap object + * + * @param blockSize The size of each block in bits + */ roaring_bitmap(size_t blockSize); + + /** + * @brief Destroy the roaring_bitmap object + */ ~roaring_bitmap() = default; public: + /** + * @brief Get the block size + * + * @return size_t The size of each block in bits + */ size_t block_size() const { return block_size_; } /** - * 设置指定位置为 1 + * @brief Set the bit at specified position to 1 + * + * Creates the necessary block if it doesn't exist + * @param position The index of the bit to set */ void set(size_t position) override; /** - * 获取指定位置的值 + * @brief Get the value of the bit at specified position + * + * @param position The index of the bit to retrieve + * @return true if the bit is set (1), false otherwise (0) */ bool get(size_t position) const override; /** - * 重载 [] 操作符 + * @brief Overload of [] operator to get bit value + * + * @param position The index of the bit to retrieve + * @return true if the bit is set (1), false otherwise (0) */ bool operator[](size_t position) const override { return get(position); } /** - * 设置指定位置为 0 + * @brief Set the bit at specified position to 0 + * + * @param position The index of the bit to reset + * @throw std::out_of_range If the position's block doesn't exist */ void reset(size_t position) override; - // 将所有元素重置为0 + /** + * @brief Reset all bits to 0 + */ void reset() override; /** - * @brief 获取数值为1的位数 + * @brief Get the number of bits set to 1 * - * @return size_t + * @return size_t The count of set bits */ size_t count() const override; /** - * 获取 bitmap 的大小(bit 数) + * @brief Get the total number of bits across all blocks + * + * @return size_t The total capacity of the bitmap in bits */ size_t size() const override; + /** + * @brief Convert the roaring bitmap to a string representation + * + * Format: {"block_index":"hex_data", "block_index":"hex_data", ...} + * @return std::string String representation of the roaring bitmap + */ std::string to_string() const; + /** + * @brief Initialize the roaring bitmap from a string representation + * + * @param text The string to parse and load + * @throw std::runtime_error If the input string has invalid format + */ void from_string(const std::string& text); + /** + * @brief Get a list of positions where bits are set to 1 + * + * @return std::vector Vector containing indices of set bits + */ std::vector valuelist() const; /** - * 判断是否相等 + * @brief Check if two roaring bitmaps are equal + * + * @param other The roaring bitmap to compare with + * @return true if bitmaps are equal, false otherwise */ - // bool operator==(const roaring_bitmap& other) const bool equals(const ibitmap& other) const override; + /** + * @brief Check if two roaring bitmaps are equal + * + * @param other The roaring bitmap to compare with + * @return true if bitmaps are equal, false otherwise + */ bool operator==(const roaring_bitmap& other) const { return equals(other); } + + /** + * @brief Check if two roaring bitmaps are not equal + * + * @param other The roaring bitmap to compare with + * @return true if bitmaps are not equal, false otherwise + */ bool operator!=(const roaring_bitmap& other) const { return !equals(other); } /** - * 与另一个 bitmap 进行 AND 操作 + * @brief Perform bitwise AND with another roaring bitmap + * + * @param other The roaring bitmap to AND with + * @return roaring_bitmap Result of the AND operation + * @throw std::invalid_argument If block sizes differ */ roaring_bitmap operator&(const roaring_bitmap& other) const; /** - * 与另一个 bitmap 进行 OR 操作 + * @brief Perform bitwise OR with another roaring bitmap + * + * @param other The roaring bitmap to OR with + * @return roaring_bitmap Result of the OR operation + * @throw std::invalid_argument If block sizes differ */ roaring_bitmap operator|(const roaring_bitmap& other) const; /** - * 与另一个 bitmap 进行 Not 操作(按位取反) + * @brief Perform bitwise NOT operation + * + * @return roaring_bitmap Result of the NOT operation */ roaring_bitmap operator~() const; /** - * 与另一个 bitmap 进行 异或 操作 + * @brief Perform bitwise XOR with another roaring bitmap + * + * @param other The roaring bitmap to XOR with + * @return roaring_bitmap Result of the XOR operation + * @throw std::invalid_argument If block sizes differ or blocks are missing */ roaring_bitmap operator^(const roaring_bitmap& other) const; - // 与 + /** + * @brief Perform bitwise AND with another roaring bitmap and assign result + * + * @param other The roaring bitmap to AND with + * @return roaring_bitmap& Reference to this bitmap after operation + * @throw std::invalid_argument If block sizes differ or blocks are missing + */ roaring_bitmap& operator&=(const roaring_bitmap& other); - // 或 + /** + * @brief Perform bitwise OR with another roaring bitmap and assign result + * + * @param other The roaring bitmap to OR with + * @return roaring_bitmap& Reference to this bitmap after operation + * @throw std::invalid_argument If block sizes differ or blocks are missing + */ roaring_bitmap& operator|=(const roaring_bitmap& other); - // 异或 + /** + * @brief Perform bitwise XOR with another roaring bitmap and assign result + * + * @param other The roaring bitmap to XOR with + * @return roaring_bitmap& Reference to this bitmap after operation + * @throw std::invalid_argument If block sizes differ or blocks are missing + */ roaring_bitmap& operator^=(const roaring_bitmap& other); }; diff --git a/src/usage_demo/main.cpp b/src/usage_demo/main.cpp index 5196311..5ec8c3c 100644 --- a/src/usage_demo/main.cpp +++ b/src/usage_demo/main.cpp @@ -1,4 +1,5 @@ #include "algoutil.hpp" +#include "bitmap.hpp" #include "common.hpp" #include "config.hpp" #include "datetime.hpp" @@ -39,20 +40,21 @@ int main(int argc, char* argv[]) // TestSysutil(); // TestStrfmt(); // TestTimeutil(); - TestTimecount(); + // TestTimecount(); // TestDatetime(); // TestVerUtil(); // TestStrUtil(); // TestFilePath(); // TestFileUtil(); // TestDlLoader(); - TestPrint(); + // TestPrint(); // TestTimer(); // TestLRUCache(); // TestThreadUtil(); // TestEventLoop(); // TestThreadPool(); // TestAlgorithmUtil(); + BitmapTest(); // usage_demo(); -- Gitee From 8772580ade6f92970f54488e63a3538729677e66 Mon Sep 17 00:00:00 2001 From: Spencer Date: Sun, 12 Oct 2025 21:12:25 +0800 Subject: [PATCH 23/40] fix: bitmap --- include/common_util/bitmap.h | 27 +++++++ src/common_util/bitmap.cpp | 152 ++++++++++++++++++++++++++++++----- src/usage_demo/bitmap.hpp | 131 +++++++++++++++++++++++++++++- 3 files changed, 289 insertions(+), 21 deletions(-) diff --git a/include/common_util/bitmap.h b/include/common_util/bitmap.h index a675b5c..b6b58fd 100644 --- a/include/common_util/bitmap.h +++ b/include/common_util/bitmap.h @@ -349,6 +349,33 @@ public: */ void set(size_t position) override; + /** + * @brief Perform bitwise AND with another dynamic_bitmap and assign result + * + * @param other The dynamic_bitmap to AND with + * @return bitmap& Reference to this bitmap after operation + * @throw std::invalid_argument If dynamic_bitmap have different block_size + */ + dynamic_bitmap& operator&=(const dynamic_bitmap& other); + + /** + * @brief Perform bitwise OR with another dynamic_bitmap and assign result + * + * @param other The dynamic_bitmap to OR with + * @return bitmap& Reference to this bitmap after operation + * @throw std::invalid_argument If dynamic_bitmap have different block_size + */ + dynamic_bitmap& operator|=(const dynamic_bitmap& other); + + /** + * @brief Perform bitwise XOR with another dynamic_bitmap and assign result + * + * @param other The dynamic_bitmap to XOR with + * @return bitmap& Reference to this bitmap after operation + * @throw std::invalid_argument If dynamic_bitmap have different block_size + */ + dynamic_bitmap& operator^=(const dynamic_bitmap& other); + private: /** * @brief Ensure the bitmap can hold at least the specified number of bits diff --git a/src/common_util/bitmap.cpp b/src/common_util/bitmap.cpp index 06a4929..5042787 100644 --- a/src/common_util/bitmap.cpp +++ b/src/common_util/bitmap.cpp @@ -2,10 +2,10 @@ #include "inner/logger.h" #include "strfmt.h" #include -#include -#include // #include +#include // #include +#include namespace cutl { @@ -38,6 +38,7 @@ void bitmap::set(size_t position) */ bool bitmap::get(size_t position) const { + // printf("position:%d, size_:%d\n", position, size_); if (position >= size_) { throw std::out_of_range("Position " + std::to_string(position) + " out of range"); @@ -208,8 +209,6 @@ bitmap bitmap::operator&(const bitmap& other) const return result; } -// #include - /** * 与另一个 bitmap 进行 OR 操作 */ @@ -242,17 +241,20 @@ bitmap bitmap::operator~() const return result; } -// 按位异或 +// 按位异或,“异或”对应于集合中的“对称差”, A异或B = (A\B) U (B\A). bitmap bitmap::operator^(const bitmap& other) const { - if (size_ != other.size_) + auto minSize = std::min(bits_.size(), other.bits_.size()); + auto maxSize = std::max(bits_.size(), other.bits_.size()); + + bitmap result(maxSize << 3); // maxSize * 8 + for (size_t i = 0; i < minSize; i++) { - throw std::invalid_argument("Bitmaps must have same size"); + result.bits_[i] = bits_[i] ^ other.bits_[i]; } - bitmap result(size_); - for (size_t i = 0; i < bits_.size(); i++) + for (size_t i = minSize; i < maxSize; i++) { - result.bits_[i] = bits_[i] ^ other.bits_[i]; + result.bits_[i] = bits_.size() > other.bits_.size() ? bits_[i] : other.bits_[i]; } return result; } @@ -325,6 +327,79 @@ void dynamic_bitmap::ensureCapacity(size_t minSize) bits_.resize(size_); } +dynamic_bitmap& dynamic_bitmap::operator&=(const dynamic_bitmap& other) +{ + auto minSize = std::min(bits_.size(), other.bits_.size()); + auto maxSize = std::max(bits_.size(), other.bits_.size()); + + bitmap result(maxSize << 3); // maxSize * 8 + for (size_t i = 0; i < minSize; i++) + { + bits_[i] &= other.bits_[i]; + } + if (bits_.size() > other.bits_.size()) + { + // 将剩余的元素填充为0 + std::fill(bits_.begin() + minSize, bits_.end(), 0); + } + + return *this; +} + +dynamic_bitmap& dynamic_bitmap::operator|=(const dynamic_bitmap& other) +{ + auto minSize = std::min(bits_.size(), other.bits_.size()); + auto maxSize = std::max(bits_.size(), other.bits_.size()); + + for (size_t i = 0; i < minSize; i++) + { + bits_[i] |= other.bits_[i]; + } + if (bits_.size() < other.bits_.size()) + { + ensureCapacity(maxSize); + for (size_t i = minSize; i < maxSize; i++) + { + bits_[i] = other.bits_[i]; + } + } + else if (size_ < bits_.size() << 3) + { + // bits_.size()相同时,size_可能不相同 + // bits_.size() * 8 + size_ = bits_.size() << 3; + } + + return *this; +} + +dynamic_bitmap& dynamic_bitmap::operator^=(const dynamic_bitmap& other) +{ + auto minSize = std::min(bits_.size(), other.bits_.size()); + auto maxSize = std::max(bits_.size(), other.bits_.size()); + + for (size_t i = 0; i < minSize; i++) + { + bits_[i] ^= other.bits_[i]; + } + if (bits_.size() < other.bits_.size()) + { + ensureCapacity(maxSize); + for (size_t i = minSize; i < maxSize; i++) + { + bits_[i] = other.bits_[i]; + } + } + else if (size_ < bits_.size() << 3) + { + // bits_.size()相同时,size_可能不相同 + // bits_.size() * 8 + size_ = bits_.size() << 3; + } + + return *this; +} + roaring_bitmap::roaring_bitmap(size_t blockSize) : block_size_(blockSize) { @@ -514,6 +589,11 @@ std::vector roaring_bitmap::valuelist() const for (const auto& key : keys) { auto vec = container_.at(key).valuelist(); + // 根据分块的大小,计算原始值的大小 + for (int i = 0; i < vec.size(); i++) + { + vec[i] = key * block_size_ + vec[i]; + } result.insert(result.end(), vec.begin(), vec.end()); } return result; @@ -653,7 +733,17 @@ roaring_bitmap roaring_bitmap::operator^(const roaring_bitmap& other) const } else { - throw std::invalid_argument("Key " + std::to_string(key) + " not in other container"); + // 属于this,但是不属于other的,也添加到result里 + rBitmap.container_.emplace(key, val); + } + } + for (auto itr = other.container_.begin(); itr != other.container_.end(); itr++) + { + auto& key = itr->first; + auto& val = itr->second; + if (!rBitmap.container_.count(key)) + { + rBitmap.container_.emplace(key, val); } } @@ -668,19 +758,31 @@ roaring_bitmap& roaring_bitmap::operator&=(const roaring_bitmap& other) throw std::invalid_argument("RoaringBitmap must have same block_size"); } - for (auto itr = container_.begin(); itr != container_.end(); itr++) + // 使用迭代器遍历,通过 erase 的返回值更新迭代器 + auto itr = container_.begin(); + while (itr != container_.end()) { auto& key = itr->first; auto& val = itr->second; if (other.container_.count(key)) { + // 两个 bitmap 都有该块,执行 AND 操作 val &= other.container_.at(key); + // 如果 AND 后块中没有置位,删除该块 + if (val.count() == 0) + { + itr = container_.erase(itr); // 安全删除,获取下一个迭代器 + } + else + { + ++itr; // 继续下一个元素 + } } else { - std::string errMsg = "Key " + std::to_string(key) + " not in other container."; - throw std::invalid_argument(errMsg); + // 另一个 bitmap 没有该块,直接删除 + itr = container_.erase(itr); // 安全删除,获取下一个迭代器 } } @@ -704,10 +806,15 @@ roaring_bitmap& roaring_bitmap::operator|=(const roaring_bitmap& other) { val |= other.container_.at(key); } - else + } + + for (auto itr = other.container_.begin(); itr != other.container_.end(); itr++) + { + auto& key = itr->first; + auto& val = itr->second; + if (!container_.count(key)) { - std::string errMsg = "Key " + std::to_string(key) + " not in other container."; - throw std::invalid_argument(errMsg); + container_.emplace(key, val); } } @@ -731,10 +838,15 @@ roaring_bitmap& roaring_bitmap::operator^=(const roaring_bitmap& other) { val ^= other.container_.at(key); } - else + } + + for (auto itr = other.container_.begin(); itr != other.container_.end(); itr++) + { + auto& key = itr->first; + auto& val = itr->second; + if (!container_.count(key)) { - std::string errMsg = "Key " + std::to_string(key) + " not in other container."; - throw std::invalid_argument(errMsg); + container_.emplace(key, val); } } diff --git a/src/usage_demo/bitmap.hpp b/src/usage_demo/bitmap.hpp index d0246e4..1fe1b75 100644 --- a/src/usage_demo/bitmap.hpp +++ b/src/usage_demo/bitmap.hpp @@ -1,5 +1,6 @@ #include "common.hpp" #include "common_util/bitmap.h" +#include // 用于测试valuelist排序 #include void test_bitmap() @@ -47,11 +48,39 @@ void test_bitmap() assert(xorResult.count() == 2); // 10,30 auto notResult = ~bitmap1; - // std::cout << "notResult count:" << notResult.count() << std::endl; assert(notResult.count() == 102); assert(notResult.get(10) == false); assert(notResult.get(0) == true); + // 新增:测试operator&= + cutl::bitmap andAssign(100); + andAssign.set(10); + andAssign.set(20); + andAssign.set(30); + andAssign &= bitmap3; // bitmap3:20,30 + assert(andAssign.count() == 2); + assert(andAssign.get(20) && andAssign.get(30) && !andAssign.get(10)); + + // 新增:测试operator|= + cutl::bitmap orAssign(100); + orAssign.set(10); + orAssign |= bitmap3; // bitmap3:20,30 + assert(orAssign.count() == 3); + assert(orAssign.get(10) && orAssign.get(20) && orAssign.get(30)); + + // 新增:测试operator^= + cutl::bitmap xorAssign(100); + xorAssign.set(10); + xorAssign.set(20); + xorAssign ^= bitmap3; // bitmap3:20,30 + assert(xorAssign.count() == 2); + assert(xorAssign.get(10) && xorAssign.get(30) && !xorAssign.get(20)); + + // 新增:测试valuelist() + std::vector expected = { 10, 20 }; + std::vector actual = bitmap1.valuelist(); + assert(actual == expected); + std::cout << "bitmap tests passed" << std::endl; } @@ -72,6 +101,56 @@ void test_dynamic_bitmap() assert(dynamicBitmap.size() >= 101); assert(dynamicBitmap.get(100) == true); + // 新增:测试operator&= + cutl::dynamic_bitmap dbAnd1(20); + dbAnd1.set(5); + dbAnd1.set(10); + dbAnd1.set(15); + + cutl::dynamic_bitmap dbAnd2(20); + dbAnd2.set(10); + dbAnd2.set(15); + dbAnd2.set(20); + + dbAnd1 &= dbAnd2; + assert(dbAnd1.count() == 2); + assert(dbAnd1.get(10) && dbAnd1.get(15) && !dbAnd1.get(5)); + assert(dbAnd1.size() != dbAnd2.size()); + + // 新增:测试operator|= + cutl::dynamic_bitmap dbOr1(10); + dbOr1.set(3); + dbOr1.set(7); + + cutl::dynamic_bitmap dbOr2(15); + dbOr2.set(7); + dbOr2.set(12); + + dbOr1 |= dbOr2; + assert(dbOr1.count() == 3); + assert(dbOr1.get(3) && dbOr1.get(7) && dbOr1.get(12)); + assert(dbOr1.size() >= 15); + + // 新增:测试operator^= + cutl::dynamic_bitmap dbXor1(20); + dbXor1.set(4); + dbXor1.set(8); + dbXor1.set(12); + + cutl::dynamic_bitmap dbXor2(20); + dbXor2.set(8); + dbXor2.set(16); + + dbXor1 ^= dbXor2; + assert(dbXor1.count() == 3); + assert(dbXor1.get(4) && dbXor1.get(12) && dbXor1.get(16) && !dbXor1.get(8)); + + // 新增:测试valuelist() + dbXor1.set(20); + std::vector expected = { 4, 12, 16, 20 }; + std::vector actual = dbXor1.valuelist(); + assert(actual == expected); + std::cout << "dynamic_bitmap tests passed" << std::endl; } @@ -123,6 +202,56 @@ void test_roaring_bitmap() bitmap1.reset(20); assert(bitmap1.count() == 2); // 10, 80 + // 新增:测试operator&= + cutl::roaring_bitmap rbAnd1(64); + rbAnd1.set(10); + rbAnd1.set(80); + rbAnd1.set(130); + + cutl::roaring_bitmap rbAnd2(64); + rbAnd2.set(80); + rbAnd2.set(130); + rbAnd2.set(150); + + rbAnd1 &= rbAnd2; + assert(rbAnd1.count() == 2); + assert(rbAnd1.get(80) && rbAnd1.get(130) && !rbAnd1.get(10)); + + // 新增:测试operator|= + cutl::roaring_bitmap rbOr1(64); + rbOr1.set(10); + rbOr1.set(80); + + cutl::roaring_bitmap rbOr2(64); + rbOr2.set(80); + rbOr2.set(130); + + rbOr1 |= rbOr2; + assert(rbOr1.count() == 3); + assert(rbOr1.get(10) && rbOr1.get(80) && rbOr1.get(130)); + + // 新增:测试operator^= + cutl::roaring_bitmap rbXor1(64); + rbXor1.set(10); + rbXor1.set(80); + rbXor1.set(130); + + cutl::roaring_bitmap rbXor2(64); + rbXor2.set(80); + rbXor2.set(150); + + rbXor1 ^= rbXor2; + assert(rbXor1.count() == 3); + // 10, 130, 150 + assert(rbXor1.get(10) && rbXor1.get(130) && rbXor1.get(150) && !rbXor1.get(80)); + + // 新增:测试valuelist() + std::vector expected = { 10, 130, 150 }; + std::vector actual = rbXor1.valuelist(); + std::cout << "rbXor1 valuelist: " << cutl::fmt_vec(actual) << std::endl; + std::sort(actual.begin(), actual.end()); // 排序后比较 + assert(actual == expected); + std::cout << "roaring_bitmap tests passed" << std::endl; } -- Gitee From 87a971762a35f7dbac1b737e41ff8fb799a4ffa5 Mon Sep 17 00:00:00 2001 From: Spencer Date: Mon, 13 Oct 2025 12:34:21 +0800 Subject: [PATCH 24/40] feat: add hash.cpp --- hash.cpp | 294 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 294 insertions(+) create mode 100644 hash.cpp diff --git a/hash.cpp b/hash.cpp new file mode 100644 index 0000000..0d1f9e1 --- /dev/null +++ b/hash.cpp @@ -0,0 +1,294 @@ +#include +#include +#include + +// DJB2哈希算法 +uint32_t DJB2(const std::string& str) +{ + // 5381: 经过大量测试选择的质数 + uint32_t djb2 = 5381; + for (char c : str) + { + djb2 = ((djb2 << 5) + djb2) + c; // DJB2 * 33 + c + } + return djb2; +} + +static const uint32_t FNV_OFFSET_BASIS = 2166136261u; +static const uint32_t FNV_PRIME = 16777619u; + +// FNV-1 32位版本 +uint32_t fnv1_32(const std::string& str) +{ + uint32_t hash = FNV_OFFSET_BASIS; + for (char c : str) + { + hash = (hash * FNV_PRIME) ^ c; + } + return hash; +} + +// FNV-1a 32位版本(推荐使用) +uint32_t fnv1a_32(const std::string& str) +{ + uint32_t hash = FNV_OFFSET_BASIS; + for (char c : str) + { + hash = (hash ^ c) * FNV_PRIME; + } + return hash; +} + +static const uint64_t FNV_OFFSET_BASIS_64 = 14695981039346656037u; +static const uint64_t FNV_PRIME_64 = 1099511628211u; + +// FNV-1a 64位版本 +uint64_t fnv1a_64(const std::string& str) +{ + uint64_t hash = FNV_OFFSET_BASIS_64; + for (char c : str) + { + hash = (hash ^ static_cast(c)) * FNV_PRIME_64; + } + return hash; +} + +// Jenkins one_at_a_time 哈希(简单但高质量) +uint32_t one_at_a_time(const std::string& str) +{ + uint32_t hash = 0; + + for (char c : str) + { + hash += static_cast(c); + hash += (hash << 10); + hash ^= (hash >> 6); + } + + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + + return hash; +} + +// Jenkins lookup3 哈希(更复杂的Jenkins哈希) +uint32_t lookup3(const void* key, size_t length, uint32_t initval = 0) +{ + uint32_t a, b, c; + const uint8_t* k = static_cast(key); + + // 设置初始值 + a = b = c = 0xdeadbeef + static_cast(length) + initval; + + // 主要哈希循环 + while (length > 12) + { + a += k[0] + (static_cast(k[1]) << 8) + (static_cast(k[2]) << 16) + + (static_cast(k[3]) << 24); + b += k[4] + (static_cast(k[5]) << 8) + (static_cast(k[6]) << 16) + + (static_cast(k[7]) << 24); + c += k[8] + (static_cast(k[9]) << 8) + (static_cast(k[10]) << 16) + + (static_cast(k[11]) << 24); + + // 混合函数 + a -= c; + a ^= ((c << 4) | (c >> 28)); + c += b; + b -= a; + b ^= ((a << 6) | (a >> 26)); + a += c; + c -= b; + c ^= ((b << 8) | (b >> 24)); + b += a; + a -= c; + a ^= ((c << 16) | (c >> 16)); + c += b; + b -= a; + b ^= ((a << 19) | (a >> 13)); + a += c; + c -= b; + c ^= ((b << 4) | (b >> 28)); + b += a; + + k += 12; + length -= 12; + } + + // 处理最后1-12个字节 + switch (length) + { + case 12: + c += (static_cast(k[11]) << 24); + [[fallthrough]]; + case 11: + c += (static_cast(k[10]) << 16); + [[fallthrough]]; + case 10: + c += (static_cast(k[9]) << 8); + [[fallthrough]]; + case 9: + c += k[8]; + [[fallthrough]]; + case 8: + b += (static_cast(k[7]) << 24); + [[fallthrough]]; + case 7: + b += (static_cast(k[6]) << 16); + [[fallthrough]]; + case 6: + b += (static_cast(k[5]) << 8); + [[fallthrough]]; + case 5: + b += k[4]; + [[fallthrough]]; + case 4: + a += (static_cast(k[3]) << 24); + [[fallthrough]]; + case 3: + a += (static_cast(k[2]) << 16); + [[fallthrough]]; + case 2: + a += (static_cast(k[1]) << 8); + [[fallthrough]]; + case 1: + a += k[0]; + break; + case 0: + return c; + } + + // 最终混合 + c ^= b; + c -= ((b << 14) | (b >> 18)); + a ^= c; + a -= ((c << 11) | (c >> 21)); + b ^= a; + b -= ((a << 25) | (a >> 7)); + c ^= b; + c -= ((b << 16) | (b >> 16)); + a ^= c; + a -= ((c << 4) | (c >> 28)); + b ^= a; + b -= ((a << 14) | (a >> 18)); + c ^= b; + c -= ((b << 24) | (b >> 8)); + + return c; +} + +// MurmurHash3 32位版本 +uint32_t murmur3_32(const void* key, size_t len, uint32_t seed = 0) +{ + const uint8_t* data = static_cast(key); + const int nblocks = len / 4; + + uint32_t h1 = seed; + + const uint32_t c1 = 0xcc9e2d51; + const uint32_t c2 = 0x1b873593; + + // 主体处理 + const uint32_t* blocks = reinterpret_cast(data + nblocks * 4); + for (int i = -nblocks; i; i++) + { + uint32_t k1 = blocks[i]; + + k1 *= c1; + k1 = (k1 << 15) | (k1 >> 17); + k1 *= c2; + + h1 ^= k1; + h1 = (h1 << 13) | (h1 >> 19); + h1 = h1 * 5 + 0xe6546b64; + } + + // 尾部处理 + const uint8_t* tail = data + nblocks * 4; + uint32_t k1 = 0; + + switch (len & 3) + { + case 3: + k1 ^= tail[2] << 16; + [[fallthrough]]; + case 2: + k1 ^= tail[1] << 8; + [[fallthrough]]; + case 1: + k1 ^= tail[0]; + k1 *= c1; + k1 = (k1 << 15) | (k1 >> 17); + k1 *= c2; + h1 ^= k1; + } + + // 最终混合 + h1 ^= len; + h1 ^= h1 >> 16; + h1 *= 0x85ebca6b; + h1 ^= h1 >> 13; + h1 *= 0xc2b2ae35; + h1 ^= h1 >> 16; + + return h1; +} + +void TestDJB2() +{ + std::string str1("Hello World"); + std::string str2("我爱中国"); + std::cout << str1 << " --> " << DJB2(str1) << std::endl; + std::cout << str2 << " --> " << DJB2(str2) << std::endl; +} + +void TestFNV1() +{ + std::string str1("Hello World"); + std::string str2("我爱中国"); + + std::cout << "fnv1_32:" << std::endl; + std::cout << str1 << " --> " << fnv1_32(str1) << std::endl; + std::cout << str2 << " --> " << fnv1_32(str2) << std::endl; + + std::cout << "fnv1a_32:" << std::endl; + std::cout << str1 << " --> " << fnv1a_32(str1) << std::endl; + std::cout << str2 << " --> " << fnv1a_32(str2) << std::endl; + + std::cout << "fnv1a_64:" << std::endl; + std::cout << str1 << " --> " << fnv1a_64(str1) << std::endl; + std::cout << str2 << " --> " << fnv1a_64(str2) << std::endl; +} + +void TestJenkins() +{ + std::string str1("Hello World"); + std::string str2("我爱中国"); + + std::cout << "one_at_a_time:" << std::endl; + std::cout << str1 << " --> " << one_at_a_time(str1) << std::endl; + std::cout << str2 << " --> " << one_at_a_time(str2) << std::endl; + + std::cout << "lookup3:" << std::endl; + std::cout << str1 << " --> " << lookup3(str1.c_str(), str1.length()) << std::endl; + std::cout << str2 << " --> " << lookup3(str2.c_str(), str2.length()) << std::endl; +} + +void TestMurmurHash() +{ + std::string str1("Hello World"); + std::string str2("我爱中国"); + + std::cout << "murmur3_32:" << std::endl; + std::cout << str1 << " --> " << murmur3_32(str1.c_str(), str1.length()) << std::endl; + std::cout << str2 << " --> " << murmur3_32(str2.c_str(), str2.length()) << std::endl; +} + +int main() +{ + TestDJB2(); + TestFNV1(); + TestJenkins(); + TestMurmurHash(); + return 0; +} \ No newline at end of file -- Gitee From afa06895c8bff6bc819ceb8b8d9f5dafe22db851 Mon Sep 17 00:00:00 2001 From: Spencer Date: Wed, 15 Oct 2025 16:02:01 +0800 Subject: [PATCH 25/40] fix: hash.cpp --- hash.cpp | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 75 insertions(+), 8 deletions(-) diff --git a/hash.cpp b/hash.cpp index 0d1f9e1..8f91155 100644 --- a/hash.cpp +++ b/hash.cpp @@ -2,6 +2,17 @@ #include #include +// 多项式滚动哈希: Polynomial rolling_hash +uint32_t polynomial_rolling(const std::string& str) +{ + int h = 0; + for (int i = 0; i < str.length(); i++) + { + h = 31 * h + str[i]; + } + return h; +} + // DJB2哈希算法 uint32_t DJB2(const std::string& str) { @@ -234,18 +245,53 @@ uint32_t murmur3_32(const void* key, size_t len, uint32_t seed = 0) return h1; } +// Thomas Wang的整数哈希函数 +uint32_t thomas_wang(uint32_t key) +{ + key = ~key + (key << 15); + key = key ^ (key >> 12); + key = key + (key << 2); + key = key ^ (key >> 4); + key = key * 2057; + key = key ^ (key >> 16); + return key; +} + +// 乘法哈希(适用于哈希表) +uint32_t multiplicative_hash(uint32_t key, uint32_t table_size) +{ + const double A = 0.6180339887; // 黄金比例的分数部分 + double product = key * A; + double fractional = product - static_cast(product); + return static_cast(table_size * fractional); +} + +// 除法哈希 +uint32_t division_hash(uint32_t key, uint32_t table_size) +{ + return key % table_size; +} + +void TestPolynomialRolling() +{ + std::string str1("Hello World!"); + std::string str2("我爱中国!"); + std::cout << str1 << " --> " << polynomial_rolling(str1) << std::endl; + std::cout << str2 << " --> " << polynomial_rolling(str2) << std::endl; +} + void TestDJB2() { - std::string str1("Hello World"); - std::string str2("我爱中国"); + std::string str1("Hello World!"); + std::string str2("我爱中国!"); std::cout << str1 << " --> " << DJB2(str1) << std::endl; std::cout << str2 << " --> " << DJB2(str2) << std::endl; } void TestFNV1() { - std::string str1("Hello World"); - std::string str2("我爱中国"); + std::string str1("Hello World!"); + std::string str2("我爱中国!"); std::cout << "fnv1_32:" << std::endl; std::cout << str1 << " --> " << fnv1_32(str1) << std::endl; @@ -262,8 +308,8 @@ void TestFNV1() void TestJenkins() { - std::string str1("Hello World"); - std::string str2("我爱中国"); + std::string str1("Hello World!"); + std::string str2("我爱中国!"); std::cout << "one_at_a_time:" << std::endl; std::cout << str1 << " --> " << one_at_a_time(str1) << std::endl; @@ -276,19 +322,40 @@ void TestJenkins() void TestMurmurHash() { - std::string str1("Hello World"); - std::string str2("我爱中国"); + std::string str1("Hello World!"); + std::string str2("我爱中国!"); std::cout << "murmur3_32:" << std::endl; std::cout << str1 << " --> " << murmur3_32(str1.c_str(), str1.length()) << std::endl; std::cout << str2 << " --> " << murmur3_32(str2.c_str(), str2.length()) << std::endl; } +void TestIntHash() +{ + uint32_t a = 6; + uint32_t b = 127; + + std::cout << "thomas_wang:" << std::endl; + std::cout << a << " --> " << thomas_wang(a) << std::endl; + std::cout << b << " --> " << thomas_wang(b) << std::endl; + + std::cout << "multiplicative_hash:" << std::endl; + std::cout << a << " --> " << multiplicative_hash(a, 20) << std::endl; + std::cout << b << " --> " << multiplicative_hash(b, 20) << std::endl; + + std::cout << "division_hash:" << std::endl; + std::cout << a << " --> " << division_hash(a, 20) << std::endl; + std::cout << b << " --> " << division_hash(b, 20) << std::endl; +} + int main() { + TestPolynomialRolling(); TestDJB2(); TestFNV1(); TestJenkins(); TestMurmurHash(); + TestIntHash(); + return 0; } \ No newline at end of file -- Gitee From 7a0fb13d8ae571afcf7f883be03e2e26e52815b7 Mon Sep 17 00:00:00 2001 From: Spencer Date: Wed, 15 Oct 2025 16:38:56 +0800 Subject: [PATCH 26/40] fix: hash.cpp --- hash.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/hash.cpp b/hash.cpp index 8f91155..55c9a98 100644 --- a/hash.cpp +++ b/hash.cpp @@ -348,6 +348,17 @@ void TestIntHash() std::cout << b << " --> " << division_hash(b, 20) << std::endl; } +void TestStdHash() +{ + std::string str1("Hello World!"); + std::string str2("我爱中国!"); + + std::cout << "std::hash():" << std::endl; + std::hash hasher; + std::cout << str1 << " --> " << hasher(str1.c_str()) << std::endl; + std::cout << str2 << " --> " << hasher(str2.c_str()) << std::endl; +} + int main() { TestPolynomialRolling(); @@ -356,6 +367,7 @@ int main() TestJenkins(); TestMurmurHash(); TestIntHash(); + TestStdHash(); return 0; } \ No newline at end of file -- Gitee From eb9a2e028cd80dc75fe716c9fadc62f1674ff201 Mon Sep 17 00:00:00 2001 From: "spencer.luo" Date: Wed, 15 Oct 2025 18:57:42 +0800 Subject: [PATCH 27/40] fix: hash.cpp --- hash.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/hash.cpp b/hash.cpp index 55c9a98..fc700d0 100644 --- a/hash.cpp +++ b/hash.cpp @@ -370,4 +370,9 @@ int main() TestStdHash(); return 0; -} \ No newline at end of file +} + +// macOS clang version 16.0.0 +// std::hash(): +// Hello World! --> 18476351241006313 +// 我爱中国! --> 16995802077979883537 \ No newline at end of file -- Gitee From 68f9e2d3b1f5aa5dee1b3f5c996b0c59b26f9b22 Mon Sep 17 00:00:00 2001 From: Spencer Date: Wed, 15 Oct 2025 20:37:44 +0800 Subject: [PATCH 28/40] fix: hash.cpp --- hash.cpp | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/hash.cpp b/hash.cpp index fc700d0..aa3034c 100644 --- a/hash.cpp +++ b/hash.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -245,6 +246,70 @@ uint32_t murmur3_32(const void* key, size_t len, uint32_t seed = 0) return h1; } +uint64_t murmur3_64(const void* key, size_t len, uint64_t seed) +{ + const uint64_t m = 0xc6a4a7935bd1e995ULL; + const int r = 47; + + const uint8_t* data = static_cast(key); + const uint8_t* end = data + (len / 8) * 8; + + uint64_t h = seed ^ (len * m); + + // 处理8字节块 + while (data != end) + { + uint64_t k; + std::memcpy(&k, data, sizeof(k)); + data += sizeof(k); + + k *= m; + k ^= k >> r; + k *= m; + + h ^= k; + h *= m; + } + + // 处理尾部字节 (0-7字节) + switch (len & 7) + { + case 7: + h ^= static_cast(data[6]) << 48; + [[fallthrough]]; + case 6: + h ^= static_cast(data[5]) << 40; + [[fallthrough]]; + case 5: + h ^= static_cast(data[4]) << 32; + [[fallthrough]]; + case 4: + h ^= static_cast(data[3]) << 24; + [[fallthrough]]; + case 3: + h ^= static_cast(data[2]) << 16; + [[fallthrough]]; + case 2: + h ^= static_cast(data[1]) << 8; + [[fallthrough]]; + case 1: + h ^= static_cast(data[0]); + h *= m; + } + + // 最终混合 + h ^= h >> r; + h *= m; + h ^= h >> r; + + return h; +} + +uint64_t murmur3_64(const std::string& str, uint64_t seed = 0) +{ + return murmur3_64(str.data(), str.length(), seed); +} + // Thomas Wang的整数哈希函数 uint32_t thomas_wang(uint32_t key) { @@ -328,6 +393,10 @@ void TestMurmurHash() std::cout << "murmur3_32:" << std::endl; std::cout << str1 << " --> " << murmur3_32(str1.c_str(), str1.length()) << std::endl; std::cout << str2 << " --> " << murmur3_32(str2.c_str(), str2.length()) << std::endl; + + std::cout << "murmur3_64:" << std::endl; + std::cout << str1 << " --> " << murmur3_64(str1.c_str(), str1.length()) << std::endl; + std::cout << str2 << " --> " << murmur3_64(str2.c_str(), str2.length()) << std::endl; } void TestIntHash() -- Gitee From 839fe2bdcb9062946f70b2c75fad01ba99c6ba7c Mon Sep 17 00:00:00 2001 From: Spencer Date: Wed, 15 Oct 2025 20:59:17 +0800 Subject: [PATCH 29/40] fix: hash --- include/common_util/hash.h | 46 +++++ src/common_util/hash.cpp | 343 +++++++++++++++++++++++++++++++++++++ src/usage_demo/hash.hpp | 124 ++++++++++++++ src/usage_demo/main.cpp | 2 + 4 files changed, 515 insertions(+) create mode 100644 include/common_util/hash.h create mode 100644 src/common_util/hash.cpp create mode 100644 src/usage_demo/hash.hpp diff --git a/include/common_util/hash.h b/include/common_util/hash.h new file mode 100644 index 0000000..77609ad --- /dev/null +++ b/include/common_util/hash.h @@ -0,0 +1,46 @@ +#pragma once + +#include +#include + +namespace cutl +{ + +// 多项式滚动哈希: Polynomial rolling_hash +uint32_t hash_polynomial_rolling(const std::string& str); + +// DJB2哈希算法 +uint32_t hash_djb2(const std::string& str); + +// FNV-1 32位版本 +uint32_t hash_fnv1_32(const std::string& str); + +// FNV-1a 32位版本(推荐使用) +uint32_t hash_fnv1a_32(const std::string& str); + +// FNV-1a 64位版本 +uint64_t hash_fnv1a_64(const std::string& str); + +// Jenkins one_at_a_time 哈希(简单但高质量) +uint32_t hash_one_at_a_time(const std::string& str); + +// Jenkins lookup3 哈希(更复杂的Jenkins哈希) +uint32_t hash_lookup3(const void* key, size_t length, uint32_t initval = 0); + +// MurmurHash3 32位版本 +uint32_t hash_murmur3_32(const void* key, size_t len, uint32_t seed = 0); + +uint64_t hash_murmur3_64(const void* key, size_t len, uint64_t seed); + +uint64_t hash_murmur3_64(const std::string& str, uint64_t seed = 0); + +// Thomas Wang的整数哈希函数 +uint32_t hash_thomas_wang(uint32_t key); + +// 乘法哈希(适用于哈希表) +uint32_t hash_multiplication(uint32_t key, uint32_t table_size); + +// 除法哈希 +uint32_t hash_division(uint32_t key, uint32_t table_size); + +} // namespace cutl \ No newline at end of file diff --git a/src/common_util/hash.cpp b/src/common_util/hash.cpp new file mode 100644 index 0000000..8a1403c --- /dev/null +++ b/src/common_util/hash.cpp @@ -0,0 +1,343 @@ +#include "hash.h" +#include +#include +#include + +namespace cutl +{ + +// 多项式滚动哈希: Polynomial rolling_hash +uint32_t hash_polynomial_rolling(const std::string& str) +{ + int h = 0; + for (int i = 0; i < str.length(); i++) + { + h = 31 * h + str[i]; + } + return h; +} + +// DJB2哈希算法 +uint32_t hash_djb2(const std::string& str) +{ + // 5381: 经过大量测试选择的质数 + uint32_t djb2 = 5381; + for (char c : str) + { + djb2 = ((djb2 << 5) + djb2) + c; // DJB2 * 33 + c + } + return djb2; +} + +static const uint32_t FNV_OFFSET_BASIS = 2166136261u; +static const uint32_t FNV_PRIME = 16777619u; + +// FNV-1 32位版本 +uint32_t hash_fnv1_32(const std::string& str) +{ + uint32_t hash = FNV_OFFSET_BASIS; + for (char c : str) + { + hash = (hash * FNV_PRIME) ^ c; + } + return hash; +} + +// FNV-1a 32位版本(推荐使用) +uint32_t hash_fnv1a_32(const std::string& str) +{ + uint32_t hash = FNV_OFFSET_BASIS; + for (char c : str) + { + hash = (hash ^ c) * FNV_PRIME; + } + return hash; +} + +static const uint64_t FNV_OFFSET_BASIS_64 = 14695981039346656037u; +static const uint64_t FNV_PRIME_64 = 1099511628211u; + +// FNV-1a 64位版本 +uint64_t hash_fnv1a_64(const std::string& str) +{ + uint64_t hash = FNV_OFFSET_BASIS_64; + for (char c : str) + { + hash = (hash ^ static_cast(c)) * FNV_PRIME_64; + } + return hash; +} + +// Jenkins one_at_a_time 哈希(简单但高质量) +uint32_t hash_one_at_a_time(const std::string& str) +{ + uint32_t hash = 0; + + for (char c : str) + { + hash += static_cast(c); + hash += (hash << 10); + hash ^= (hash >> 6); + } + + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + + return hash; +} + +// Jenkins lookup3 哈希(更复杂的Jenkins哈希) +uint32_t hash_lookup3(const void* key, size_t length, uint32_t initval) +{ + uint32_t a, b, c; + const uint8_t* k = static_cast(key); + + // 设置初始值 + a = b = c = 0xdeadbeef + static_cast(length) + initval; + + // 主要哈希循环 + while (length > 12) + { + a += k[0] + (static_cast(k[1]) << 8) + (static_cast(k[2]) << 16) + + (static_cast(k[3]) << 24); + b += k[4] + (static_cast(k[5]) << 8) + (static_cast(k[6]) << 16) + + (static_cast(k[7]) << 24); + c += k[8] + (static_cast(k[9]) << 8) + (static_cast(k[10]) << 16) + + (static_cast(k[11]) << 24); + + // 混合函数 + a -= c; + a ^= ((c << 4) | (c >> 28)); + c += b; + b -= a; + b ^= ((a << 6) | (a >> 26)); + a += c; + c -= b; + c ^= ((b << 8) | (b >> 24)); + b += a; + a -= c; + a ^= ((c << 16) | (c >> 16)); + c += b; + b -= a; + b ^= ((a << 19) | (a >> 13)); + a += c; + c -= b; + c ^= ((b << 4) | (b >> 28)); + b += a; + + k += 12; + length -= 12; + } + + // 处理最后1-12个字节 + switch (length) + { + case 12: + c += (static_cast(k[11]) << 24); + [[fallthrough]]; + case 11: + c += (static_cast(k[10]) << 16); + [[fallthrough]]; + case 10: + c += (static_cast(k[9]) << 8); + [[fallthrough]]; + case 9: + c += k[8]; + [[fallthrough]]; + case 8: + b += (static_cast(k[7]) << 24); + [[fallthrough]]; + case 7: + b += (static_cast(k[6]) << 16); + [[fallthrough]]; + case 6: + b += (static_cast(k[5]) << 8); + [[fallthrough]]; + case 5: + b += k[4]; + [[fallthrough]]; + case 4: + a += (static_cast(k[3]) << 24); + [[fallthrough]]; + case 3: + a += (static_cast(k[2]) << 16); + [[fallthrough]]; + case 2: + a += (static_cast(k[1]) << 8); + [[fallthrough]]; + case 1: + a += k[0]; + break; + case 0: + return c; + } + + // 最终混合 + c ^= b; + c -= ((b << 14) | (b >> 18)); + a ^= c; + a -= ((c << 11) | (c >> 21)); + b ^= a; + b -= ((a << 25) | (a >> 7)); + c ^= b; + c -= ((b << 16) | (b >> 16)); + a ^= c; + a -= ((c << 4) | (c >> 28)); + b ^= a; + b -= ((a << 14) | (a >> 18)); + c ^= b; + c -= ((b << 24) | (b >> 8)); + + return c; +} + +// MurmurHash3 32位版本 +uint32_t hash_murmur3_32(const void* key, size_t len, uint32_t seed) +{ + const uint8_t* data = static_cast(key); + const int nblocks = len / 4; + + uint32_t h1 = seed; + + const uint32_t c1 = 0xcc9e2d51; + const uint32_t c2 = 0x1b873593; + + // 主体处理 + const uint32_t* blocks = reinterpret_cast(data + nblocks * 4); + for (int i = -nblocks; i; i++) + { + uint32_t k1 = blocks[i]; + + k1 *= c1; + k1 = (k1 << 15) | (k1 >> 17); + k1 *= c2; + + h1 ^= k1; + h1 = (h1 << 13) | (h1 >> 19); + h1 = h1 * 5 + 0xe6546b64; + } + + // 尾部处理 + const uint8_t* tail = data + nblocks * 4; + uint32_t k1 = 0; + + switch (len & 3) + { + case 3: + k1 ^= tail[2] << 16; + [[fallthrough]]; + case 2: + k1 ^= tail[1] << 8; + [[fallthrough]]; + case 1: + k1 ^= tail[0]; + k1 *= c1; + k1 = (k1 << 15) | (k1 >> 17); + k1 *= c2; + h1 ^= k1; + } + + // 最终混合 + h1 ^= len; + h1 ^= h1 >> 16; + h1 *= 0x85ebca6b; + h1 ^= h1 >> 13; + h1 *= 0xc2b2ae35; + h1 ^= h1 >> 16; + + return h1; +} + +uint64_t hash_murmur3_64(const void* key, size_t len, uint64_t seed) +{ + const uint64_t m = 0xc6a4a7935bd1e995ULL; + const int r = 47; + + const uint8_t* data = static_cast(key); + const uint8_t* end = data + (len / 8) * 8; + + uint64_t h = seed ^ (len * m); + + // 处理8字节块 + while (data != end) + { + uint64_t k; + std::memcpy(&k, data, sizeof(k)); + data += sizeof(k); + + k *= m; + k ^= k >> r; + k *= m; + + h ^= k; + h *= m; + } + + // 处理尾部字节 (0-7字节) + switch (len & 7) + { + case 7: + h ^= static_cast(data[6]) << 48; + [[fallthrough]]; + case 6: + h ^= static_cast(data[5]) << 40; + [[fallthrough]]; + case 5: + h ^= static_cast(data[4]) << 32; + [[fallthrough]]; + case 4: + h ^= static_cast(data[3]) << 24; + [[fallthrough]]; + case 3: + h ^= static_cast(data[2]) << 16; + [[fallthrough]]; + case 2: + h ^= static_cast(data[1]) << 8; + [[fallthrough]]; + case 1: + h ^= static_cast(data[0]); + h *= m; + } + + // 最终混合 + h ^= h >> r; + h *= m; + h ^= h >> r; + + return h; +} + +uint64_t hash_murmur3_64(const std::string& str, uint64_t seed) +{ + return hash_murmur3_64(str.data(), str.length(), seed); +} + +// Thomas Wang的整数哈希函数 +uint32_t hash_thomas_wang(uint32_t key) +{ + key = ~key + (key << 15); + key = key ^ (key >> 12); + key = key + (key << 2); + key = key ^ (key >> 4); + key = key * 2057; + key = key ^ (key >> 16); + return key; +} + +// 乘法哈希(适用于哈希表) +uint32_t hash_multiplication(uint32_t key, uint32_t table_size) +{ + const double A = 0.6180339887; // 黄金比例的分数部分 + double product = key * A; + double fractional = product - static_cast(product); + return static_cast(table_size * fractional); +} + +// 除法哈希 +uint32_t hash_division(uint32_t key, uint32_t table_size) +{ + return key % table_size; +} + +} // namespace cutl diff --git a/src/usage_demo/hash.hpp b/src/usage_demo/hash.hpp new file mode 100644 index 0000000..5cfc44d --- /dev/null +++ b/src/usage_demo/hash.hpp @@ -0,0 +1,124 @@ +#include "common.hpp" +#include "common_util/hash.h" + +void TestPolynomialRolling() +{ + PrintSubTitle("polynomial rolling hash"); + + std::string str1("Hello World!"); + std::string str2("我爱中国!"); + std::cout << str1 << " --> " << cutl::hash_polynomial_rolling(str1) << std::endl; + std::cout << str2 << " --> " << cutl::hash_polynomial_rolling(str2) << std::endl; +} + +void TestDJB2Hash() +{ + PrintSubTitle("DJB2 hash"); + + std::string str1("Hello World!"); + std::string str2("我爱中国!"); + std::cout << str1 << " --> " << cutl::hash_djb2(str1) << std::endl; + std::cout << str2 << " --> " << cutl::hash_djb2(str2) << std::endl; +} + +void TestFNV1Hash() +{ + PrintSubTitle("FNV1 hash"); + + std::string str1("Hello World!"); + std::string str2("我爱中国!"); + + std::cout << "fnv1_32:" << std::endl; + std::cout << str1 << " --> " << cutl::hash_fnv1_32(str1) << std::endl; + std::cout << str2 << " --> " << cutl::hash_fnv1_32(str2) << std::endl; + + std::cout << "fnv1a_32:" << std::endl; + std::cout << str1 << " --> " << cutl::hash_fnv1a_32(str1) << std::endl; + std::cout << str2 << " --> " << cutl::hash_fnv1a_32(str2) << std::endl; + + std::cout << "fnv1a_64:" << std::endl; + std::cout << str1 << " --> " << cutl::hash_fnv1a_64(str1) << std::endl; + std::cout << str2 << " --> " << cutl::hash_fnv1a_64(str2) << std::endl; +} + +void TestJenkinsHash() +{ + PrintSubTitle("Jenkins hash"); + + std::string str1("Hello World!"); + std::string str2("我爱中国!"); + + std::cout << "one_at_a_time:" << std::endl; + std::cout << str1 << " --> " << cutl::hash_one_at_a_time(str1) << std::endl; + std::cout << str2 << " --> " << cutl::hash_one_at_a_time(str2) << std::endl; + + std::cout << "lookup3:" << std::endl; + std::cout << str1 << " --> " << cutl::hash_lookup3(str1.c_str(), str1.length()) << std::endl; + std::cout << str2 << " --> " << cutl::hash_lookup3(str2.c_str(), str2.length()) << std::endl; +} + +void TestMurmurHash() +{ + PrintSubTitle("murmur hash"); + + std::string str1("Hello World!"); + std::string str2("我爱中国!"); + + std::cout << "murmur3_32:" << std::endl; + std::cout << str1 << " --> " << cutl::hash_murmur3_32(str1.c_str(), str1.length()) + << std::endl; + std::cout << str2 << " --> " << cutl::hash_murmur3_32(str2.c_str(), str2.length()) + << std::endl; + + std::cout << "murmur3_64:" << std::endl; + std::cout << str1 << " --> " << cutl::hash_murmur3_64(str1.c_str(), str1.length()) + << std::endl; + std::cout << str2 << " --> " << cutl::hash_murmur3_64(str2.c_str(), str2.length()) + << std::endl; +} + +void TestIntHash() +{ + PrintSubTitle("int hash"); + + uint32_t a = 6; + uint32_t b = 127; + + std::cout << "thomas_wang:" << std::endl; + std::cout << a << " --> " << cutl::hash_thomas_wang(a) << std::endl; + std::cout << b << " --> " << cutl::hash_thomas_wang(b) << std::endl; + + std::cout << "multiplicative_hash:" << std::endl; + std::cout << a << " --> " << cutl::hash_multiplication(a, 20) << std::endl; + std::cout << b << " --> " << cutl::hash_multiplication(b, 20) << std::endl; + + std::cout << "division_hash:" << std::endl; + std::cout << a << " --> " << cutl::hash_division(a, 20) << std::endl; + std::cout << b << " --> " << cutl::hash_division(b, 20) << std::endl; +} + +void TestStdHash() +{ + PrintSubTitle("std::hash()"); + + std::string str1("Hello World!"); + std::string str2("我爱中国!"); + + std::cout << "std::hash():" << std::endl; + std::hash hasher; + std::cout << str1 << " --> " << hasher(str1.c_str()) << std::endl; + std::cout << str2 << " --> " << hasher(str2.c_str()) << std::endl; +} + +void TestHash() +{ + PrintTitle("TestHash"); + + TestPolynomialRolling(); + TestDJB2Hash(); + TestFNV1Hash(); + TestJenkinsHash(); + TestMurmurHash(); + TestIntHash(); + TestStdHash(); +} diff --git a/src/usage_demo/main.cpp b/src/usage_demo/main.cpp index 5ec8c3c..8dcec15 100644 --- a/src/usage_demo/main.cpp +++ b/src/usage_demo/main.cpp @@ -7,6 +7,7 @@ #include "eventloop.hpp" #include "filepath.hpp" #include "fileutil.hpp" +#include "hash.hpp" #include "lrucache.hpp" #include "print.hpp" #include "singleton.hpp" @@ -55,6 +56,7 @@ int main(int argc, char* argv[]) // TestThreadPool(); // TestAlgorithmUtil(); BitmapTest(); + TestHash(); // usage_demo(); -- Gitee From d0811967cfddeca68d92d0f6bd21275693cd6ec1 Mon Sep 17 00:00:00 2001 From: Spencer Date: Wed, 15 Oct 2025 20:59:31 +0800 Subject: [PATCH 30/40] fix: hash --- hash.cpp | 447 ------------------------------------------------------- 1 file changed, 447 deletions(-) delete mode 100644 hash.cpp diff --git a/hash.cpp b/hash.cpp deleted file mode 100644 index aa3034c..0000000 --- a/hash.cpp +++ /dev/null @@ -1,447 +0,0 @@ -#include -#include -#include -#include - -// 多项式滚动哈希: Polynomial rolling_hash -uint32_t polynomial_rolling(const std::string& str) -{ - int h = 0; - for (int i = 0; i < str.length(); i++) - { - h = 31 * h + str[i]; - } - return h; -} - -// DJB2哈希算法 -uint32_t DJB2(const std::string& str) -{ - // 5381: 经过大量测试选择的质数 - uint32_t djb2 = 5381; - for (char c : str) - { - djb2 = ((djb2 << 5) + djb2) + c; // DJB2 * 33 + c - } - return djb2; -} - -static const uint32_t FNV_OFFSET_BASIS = 2166136261u; -static const uint32_t FNV_PRIME = 16777619u; - -// FNV-1 32位版本 -uint32_t fnv1_32(const std::string& str) -{ - uint32_t hash = FNV_OFFSET_BASIS; - for (char c : str) - { - hash = (hash * FNV_PRIME) ^ c; - } - return hash; -} - -// FNV-1a 32位版本(推荐使用) -uint32_t fnv1a_32(const std::string& str) -{ - uint32_t hash = FNV_OFFSET_BASIS; - for (char c : str) - { - hash = (hash ^ c) * FNV_PRIME; - } - return hash; -} - -static const uint64_t FNV_OFFSET_BASIS_64 = 14695981039346656037u; -static const uint64_t FNV_PRIME_64 = 1099511628211u; - -// FNV-1a 64位版本 -uint64_t fnv1a_64(const std::string& str) -{ - uint64_t hash = FNV_OFFSET_BASIS_64; - for (char c : str) - { - hash = (hash ^ static_cast(c)) * FNV_PRIME_64; - } - return hash; -} - -// Jenkins one_at_a_time 哈希(简单但高质量) -uint32_t one_at_a_time(const std::string& str) -{ - uint32_t hash = 0; - - for (char c : str) - { - hash += static_cast(c); - hash += (hash << 10); - hash ^= (hash >> 6); - } - - hash += (hash << 3); - hash ^= (hash >> 11); - hash += (hash << 15); - - return hash; -} - -// Jenkins lookup3 哈希(更复杂的Jenkins哈希) -uint32_t lookup3(const void* key, size_t length, uint32_t initval = 0) -{ - uint32_t a, b, c; - const uint8_t* k = static_cast(key); - - // 设置初始值 - a = b = c = 0xdeadbeef + static_cast(length) + initval; - - // 主要哈希循环 - while (length > 12) - { - a += k[0] + (static_cast(k[1]) << 8) + (static_cast(k[2]) << 16) + - (static_cast(k[3]) << 24); - b += k[4] + (static_cast(k[5]) << 8) + (static_cast(k[6]) << 16) + - (static_cast(k[7]) << 24); - c += k[8] + (static_cast(k[9]) << 8) + (static_cast(k[10]) << 16) + - (static_cast(k[11]) << 24); - - // 混合函数 - a -= c; - a ^= ((c << 4) | (c >> 28)); - c += b; - b -= a; - b ^= ((a << 6) | (a >> 26)); - a += c; - c -= b; - c ^= ((b << 8) | (b >> 24)); - b += a; - a -= c; - a ^= ((c << 16) | (c >> 16)); - c += b; - b -= a; - b ^= ((a << 19) | (a >> 13)); - a += c; - c -= b; - c ^= ((b << 4) | (b >> 28)); - b += a; - - k += 12; - length -= 12; - } - - // 处理最后1-12个字节 - switch (length) - { - case 12: - c += (static_cast(k[11]) << 24); - [[fallthrough]]; - case 11: - c += (static_cast(k[10]) << 16); - [[fallthrough]]; - case 10: - c += (static_cast(k[9]) << 8); - [[fallthrough]]; - case 9: - c += k[8]; - [[fallthrough]]; - case 8: - b += (static_cast(k[7]) << 24); - [[fallthrough]]; - case 7: - b += (static_cast(k[6]) << 16); - [[fallthrough]]; - case 6: - b += (static_cast(k[5]) << 8); - [[fallthrough]]; - case 5: - b += k[4]; - [[fallthrough]]; - case 4: - a += (static_cast(k[3]) << 24); - [[fallthrough]]; - case 3: - a += (static_cast(k[2]) << 16); - [[fallthrough]]; - case 2: - a += (static_cast(k[1]) << 8); - [[fallthrough]]; - case 1: - a += k[0]; - break; - case 0: - return c; - } - - // 最终混合 - c ^= b; - c -= ((b << 14) | (b >> 18)); - a ^= c; - a -= ((c << 11) | (c >> 21)); - b ^= a; - b -= ((a << 25) | (a >> 7)); - c ^= b; - c -= ((b << 16) | (b >> 16)); - a ^= c; - a -= ((c << 4) | (c >> 28)); - b ^= a; - b -= ((a << 14) | (a >> 18)); - c ^= b; - c -= ((b << 24) | (b >> 8)); - - return c; -} - -// MurmurHash3 32位版本 -uint32_t murmur3_32(const void* key, size_t len, uint32_t seed = 0) -{ - const uint8_t* data = static_cast(key); - const int nblocks = len / 4; - - uint32_t h1 = seed; - - const uint32_t c1 = 0xcc9e2d51; - const uint32_t c2 = 0x1b873593; - - // 主体处理 - const uint32_t* blocks = reinterpret_cast(data + nblocks * 4); - for (int i = -nblocks; i; i++) - { - uint32_t k1 = blocks[i]; - - k1 *= c1; - k1 = (k1 << 15) | (k1 >> 17); - k1 *= c2; - - h1 ^= k1; - h1 = (h1 << 13) | (h1 >> 19); - h1 = h1 * 5 + 0xe6546b64; - } - - // 尾部处理 - const uint8_t* tail = data + nblocks * 4; - uint32_t k1 = 0; - - switch (len & 3) - { - case 3: - k1 ^= tail[2] << 16; - [[fallthrough]]; - case 2: - k1 ^= tail[1] << 8; - [[fallthrough]]; - case 1: - k1 ^= tail[0]; - k1 *= c1; - k1 = (k1 << 15) | (k1 >> 17); - k1 *= c2; - h1 ^= k1; - } - - // 最终混合 - h1 ^= len; - h1 ^= h1 >> 16; - h1 *= 0x85ebca6b; - h1 ^= h1 >> 13; - h1 *= 0xc2b2ae35; - h1 ^= h1 >> 16; - - return h1; -} - -uint64_t murmur3_64(const void* key, size_t len, uint64_t seed) -{ - const uint64_t m = 0xc6a4a7935bd1e995ULL; - const int r = 47; - - const uint8_t* data = static_cast(key); - const uint8_t* end = data + (len / 8) * 8; - - uint64_t h = seed ^ (len * m); - - // 处理8字节块 - while (data != end) - { - uint64_t k; - std::memcpy(&k, data, sizeof(k)); - data += sizeof(k); - - k *= m; - k ^= k >> r; - k *= m; - - h ^= k; - h *= m; - } - - // 处理尾部字节 (0-7字节) - switch (len & 7) - { - case 7: - h ^= static_cast(data[6]) << 48; - [[fallthrough]]; - case 6: - h ^= static_cast(data[5]) << 40; - [[fallthrough]]; - case 5: - h ^= static_cast(data[4]) << 32; - [[fallthrough]]; - case 4: - h ^= static_cast(data[3]) << 24; - [[fallthrough]]; - case 3: - h ^= static_cast(data[2]) << 16; - [[fallthrough]]; - case 2: - h ^= static_cast(data[1]) << 8; - [[fallthrough]]; - case 1: - h ^= static_cast(data[0]); - h *= m; - } - - // 最终混合 - h ^= h >> r; - h *= m; - h ^= h >> r; - - return h; -} - -uint64_t murmur3_64(const std::string& str, uint64_t seed = 0) -{ - return murmur3_64(str.data(), str.length(), seed); -} - -// Thomas Wang的整数哈希函数 -uint32_t thomas_wang(uint32_t key) -{ - key = ~key + (key << 15); - key = key ^ (key >> 12); - key = key + (key << 2); - key = key ^ (key >> 4); - key = key * 2057; - key = key ^ (key >> 16); - return key; -} - -// 乘法哈希(适用于哈希表) -uint32_t multiplicative_hash(uint32_t key, uint32_t table_size) -{ - const double A = 0.6180339887; // 黄金比例的分数部分 - double product = key * A; - double fractional = product - static_cast(product); - return static_cast(table_size * fractional); -} - -// 除法哈希 -uint32_t division_hash(uint32_t key, uint32_t table_size) -{ - return key % table_size; -} - -void TestPolynomialRolling() -{ - std::string str1("Hello World!"); - std::string str2("我爱中国!"); - std::cout << str1 << " --> " << polynomial_rolling(str1) << std::endl; - std::cout << str2 << " --> " << polynomial_rolling(str2) << std::endl; -} - -void TestDJB2() -{ - std::string str1("Hello World!"); - std::string str2("我爱中国!"); - std::cout << str1 << " --> " << DJB2(str1) << std::endl; - std::cout << str2 << " --> " << DJB2(str2) << std::endl; -} - -void TestFNV1() -{ - std::string str1("Hello World!"); - std::string str2("我爱中国!"); - - std::cout << "fnv1_32:" << std::endl; - std::cout << str1 << " --> " << fnv1_32(str1) << std::endl; - std::cout << str2 << " --> " << fnv1_32(str2) << std::endl; - - std::cout << "fnv1a_32:" << std::endl; - std::cout << str1 << " --> " << fnv1a_32(str1) << std::endl; - std::cout << str2 << " --> " << fnv1a_32(str2) << std::endl; - - std::cout << "fnv1a_64:" << std::endl; - std::cout << str1 << " --> " << fnv1a_64(str1) << std::endl; - std::cout << str2 << " --> " << fnv1a_64(str2) << std::endl; -} - -void TestJenkins() -{ - std::string str1("Hello World!"); - std::string str2("我爱中国!"); - - std::cout << "one_at_a_time:" << std::endl; - std::cout << str1 << " --> " << one_at_a_time(str1) << std::endl; - std::cout << str2 << " --> " << one_at_a_time(str2) << std::endl; - - std::cout << "lookup3:" << std::endl; - std::cout << str1 << " --> " << lookup3(str1.c_str(), str1.length()) << std::endl; - std::cout << str2 << " --> " << lookup3(str2.c_str(), str2.length()) << std::endl; -} - -void TestMurmurHash() -{ - std::string str1("Hello World!"); - std::string str2("我爱中国!"); - - std::cout << "murmur3_32:" << std::endl; - std::cout << str1 << " --> " << murmur3_32(str1.c_str(), str1.length()) << std::endl; - std::cout << str2 << " --> " << murmur3_32(str2.c_str(), str2.length()) << std::endl; - - std::cout << "murmur3_64:" << std::endl; - std::cout << str1 << " --> " << murmur3_64(str1.c_str(), str1.length()) << std::endl; - std::cout << str2 << " --> " << murmur3_64(str2.c_str(), str2.length()) << std::endl; -} - -void TestIntHash() -{ - uint32_t a = 6; - uint32_t b = 127; - - std::cout << "thomas_wang:" << std::endl; - std::cout << a << " --> " << thomas_wang(a) << std::endl; - std::cout << b << " --> " << thomas_wang(b) << std::endl; - - std::cout << "multiplicative_hash:" << std::endl; - std::cout << a << " --> " << multiplicative_hash(a, 20) << std::endl; - std::cout << b << " --> " << multiplicative_hash(b, 20) << std::endl; - - std::cout << "division_hash:" << std::endl; - std::cout << a << " --> " << division_hash(a, 20) << std::endl; - std::cout << b << " --> " << division_hash(b, 20) << std::endl; -} - -void TestStdHash() -{ - std::string str1("Hello World!"); - std::string str2("我爱中国!"); - - std::cout << "std::hash():" << std::endl; - std::hash hasher; - std::cout << str1 << " --> " << hasher(str1.c_str()) << std::endl; - std::cout << str2 << " --> " << hasher(str2.c_str()) << std::endl; -} - -int main() -{ - TestPolynomialRolling(); - TestDJB2(); - TestFNV1(); - TestJenkins(); - TestMurmurHash(); - TestIntHash(); - TestStdHash(); - - return 0; -} - -// macOS clang version 16.0.0 -// std::hash(): -// Hello World! --> 18476351241006313 -// 我爱中国! --> 16995802077979883537 \ No newline at end of file -- Gitee From 42ea8e1eed7e021f8a7beb8704a688e08060e7d9 Mon Sep 17 00:00:00 2001 From: Spencer Date: Wed, 15 Oct 2025 21:04:28 +0800 Subject: [PATCH 31/40] feat: add comment for hash.h --- include/common_util/hash.h | 157 ++++++++++++++++++++++++++++++++++--- 1 file changed, 145 insertions(+), 12 deletions(-) diff --git a/include/common_util/hash.h b/include/common_util/hash.h index 77609ad..cede426 100644 --- a/include/common_util/hash.h +++ b/include/common_util/hash.h @@ -1,4 +1,23 @@ -#pragma once +/** + * @copyright Copyright (c) 2025, Spencer.Luo. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing permissions and + * limitations. + * + * @file hash.h + * @brief Commonly used hash functions, such as DJB2/FNV-1/lookup3 + * @author Spencer + * @date 2025-10-15 + */ +#pragma once #include #include @@ -6,41 +25,155 @@ namespace cutl { -// 多项式滚动哈希: Polynomial rolling_hash +/** + * @brief Polynomial rolling hash function + * + * A simple hash function that processes each character in sequence, + * using a rolling polynomial calculation to generate a hash value. + * + * @param str The input string to hash + * @return uint32_t The resulting hash value + */ uint32_t hash_polynomial_rolling(const std::string& str); -// DJB2哈希算法 +/** + * @brief DJB2 hash algorithm + * + * A popular non-cryptographic hash function created by Daniel J. Bernstein. + * Known for its simplicity and good distribution properties. + * + * @param str The input string to hash + * @return uint32_t The resulting hash value + */ uint32_t hash_djb2(const std::string& str); -// FNV-1 32位版本 +/** + * @brief FNV-1 hash function (32-bit version) + * + * Fowler-Noll-Vo hash function, version 1. A non-cryptographic hash function + * with good avalanche properties and distribution. + * + * @param str The input string to hash + * @return uint32_t The resulting hash value + */ uint32_t hash_fnv1_32(const std::string& str); -// FNV-1a 32位版本(推荐使用) +/** + * @brief FNV-1a hash function (32-bit version, recommended) + * + * Improved version of FNV-1 with better avalanche properties by changing + * the order of XOR and multiplication operations. + * + * @param str The input string to hash + * @return uint32_t The resulting hash value + */ uint32_t hash_fnv1a_32(const std::string& str); -// FNV-1a 64位版本 +/** + * @brief FNV-1a hash function (64-bit version) + * + * 64-bit implementation of the improved FNV-1a hash function, providing + * a larger hash space for reduced collision probability. + * + * @param str The input string to hash + * @return uint64_t The resulting hash value + */ uint64_t hash_fnv1a_64(const std::string& str); -// Jenkins one_at_a_time 哈希(简单但高质量) +/** + * @brief Jenkins one_at_a_time hash function + * + * A simple but high-quality hash function designed by Bob Jenkins. + * Processes each byte individually and produces good distribution. + * + * @param str The input string to hash + * @return uint32_t The resulting hash value + */ uint32_t hash_one_at_a_time(const std::string& str); -// Jenkins lookup3 哈希(更复杂的Jenkins哈希) +/** + * @brief Jenkins lookup3 hash function + * + * A more complex hash function by Bob Jenkins, offering high performance + * and excellent avalanche properties for general-purpose hashing. + * + * @param key Pointer to the data to hash + * @param length Length of the data in bytes + * @param initval Initial value for the hash calculation + * @return uint32_t The resulting hash value + */ uint32_t hash_lookup3(const void* key, size_t length, uint32_t initval = 0); -// MurmurHash3 32位版本 +/** + * @brief MurmurHash3 hash function (32-bit version) + * + * A fast non-cryptographic hash function created by Austin Appleby. + * Known for its excellent performance and distribution characteristics. + * + * @param key Pointer to the data to hash + * @param len Length of the data in bytes + * @param seed Initial seed value for hash calculation + * @return uint32_t The resulting hash value + */ uint32_t hash_murmur3_32(const void* key, size_t len, uint32_t seed = 0); +/** + * @brief MurmurHash3 hash function (64-bit version) + * + * 64-bit implementation of MurmurHash3, providing a larger hash space + * for applications requiring reduced collision probability. + * + * @param key Pointer to the data to hash + * @param len Length of the data in bytes + * @param seed Initial seed value for hash calculation + * @return uint64_t The resulting hash value + */ uint64_t hash_murmur3_64(const void* key, size_t len, uint64_t seed); +/** + * @brief MurmurHash3 hash function (64-bit version for strings) + * + * Convenience overload of the 64-bit MurmurHash3 function for string inputs. + * + * @param str The input string to hash + * @param seed Initial seed value for hash calculation + * @return uint64_t The resulting hash value + */ uint64_t hash_murmur3_64(const std::string& str, uint64_t seed = 0); -// Thomas Wang的整数哈希函数 +/** + * @brief Thomas Wang's integer hash function + * + * A hash function designed specifically for 32-bit integers, providing + * good avalanche properties and minimal collisions. + * + * @param key The 32-bit integer to hash + * @return uint32_t The resulting hash value + */ uint32_t hash_thomas_wang(uint32_t key); -// 乘法哈希(适用于哈希表) +/** + * @brief Multiplication hash function + * + * A hash function suitable for hash tables that uses multiplication by a + * large prime number to distribute keys across the table. + * + * @param key The integer key to hash + * @param table_size The size of the hash table + * @return uint32_t The resulting hash value (table index) + */ uint32_t hash_multiplication(uint32_t key, uint32_t table_size); -// 除法哈希 +/** + * @brief Division hash function + * + * A simple hash function for hash tables that uses modulo operation to + * map keys to table indices. + * + * @param key The integer key to hash + * @param table_size The size of the hash table + * @return uint32_t The resulting hash value (table index) + */ uint32_t hash_division(uint32_t key, uint32_t table_size); } // namespace cutl \ No newline at end of file -- Gitee From 64a53af5327f0272630b75eb31f161d43fdb64d8 Mon Sep 17 00:00:00 2001 From: Spencer Date: Thu, 16 Oct 2025 21:18:38 +0800 Subject: [PATCH 32/40] feat: add bloomfilter.h --- include/common_util/bloomfilter.h | 113 +++++++++++++++ src/common_util/bloomfilter.cpp | 233 ++++++++++++++++++++++++++++++ src/usage_demo/bloomfilter.hpp | 76 ++++++++++ src/usage_demo/main.cpp | 6 +- 4 files changed, 426 insertions(+), 2 deletions(-) create mode 100644 include/common_util/bloomfilter.h create mode 100644 src/common_util/bloomfilter.cpp create mode 100644 src/usage_demo/bloomfilter.hpp diff --git a/include/common_util/bloomfilter.h b/include/common_util/bloomfilter.h new file mode 100644 index 0000000..22e563f --- /dev/null +++ b/include/common_util/bloomfilter.h @@ -0,0 +1,113 @@ +#pragma once + +#include "bitmap.h" +#include +#include +#include +#include +#include + +namespace cutl +{ + +// 误判率 +enum bloom_error_rate +{ + percent_01 = 1, + percent_10 = 10, + percent_20 = 20, + percent_30 = 30, + percent_40 = 40, + percent_50 = 50, +}; + +class bloom_filter +{ +private: + size_t size_; + std::shared_ptr bitmap_; + size_t hash_size_; + +public: + bloom_filter(size_t size, size_t hash_size); + + bloom_filter(size_t expected_size, bloom_error_rate error_rate = bloom_error_rate::percent_01); + + ~bloom_filter() = default; + +public: + /** + * 添加元素 + */ + void add(const std::string& value); + + /** + * 判断元素是否存在(可能有误判) + */ + bool contains(const std::string& value) const; + + /** + * 清空布隆过滤器 + */ + void clear() { bitmap_->reset(); } + + /** + * 获取位图中设置位的数量(测试函数,实际业务中不建议使用) + */ + size_t test_getSetBitCount() const { return bitmap_->count(); } + + // 估算元素数量(测试函数,实际业务中不建议使用) + size_t test_estimateCount() const; + + // 获取相对误差(测试函数,实际业务中不建议使用) + double test_getRelativeError() { return 1.04 / std::sqrt(size_); } + +private: + // 双重哈希函数,生成k个哈希值 + std::pair hash(const std::string& str) const; +}; + +/** + * @brief 计算最优哈希函数数量 + * + * @param m 位数组大小(比特数) + * @param n 预期插入元素数量 + * @return size_t + */ +size_t bloom_optimal_k(size_t m, size_t n); + +/** + * @brief 计算最优位数组大小 + * + * @param n 预期插入元素数量 + * @param p 误判率 + * @return size_t + */ +size_t bloom_optimal_m(size_t n, double p); + +/** + * @brief 计算实际误判率 + * + * @param m 位数组大小(比特数) + * @param n 预期插入元素数量 + * @param k 哈希函数数量 + * @return double + */ +double bloom_false_positive_rate(size_t m, size_t n, size_t k); + +/** + * @brief 综合计算所有最优参数 + * + * @param n 预期的元素数量 + * @param target_fp_rate 目标误判率 + * @param optimal_m [out] 最优位数组大小 + * @param optimal_k [out] 最优哈西函数数量 + * @param actual_fp_rate [out] 实际误判率 + */ +void bloom_optimal_parameters(size_t n, + double target_fp_rate, + size_t& optimal_m, + size_t& optimal_k, + double& actual_fp_rate); + +} // namespace cutl \ No newline at end of file diff --git a/src/common_util/bloomfilter.cpp b/src/common_util/bloomfilter.cpp new file mode 100644 index 0000000..f2426dd --- /dev/null +++ b/src/common_util/bloomfilter.cpp @@ -0,0 +1,233 @@ +#include "bloomfilter.h" + +namespace cutl +{ + +bloom_filter::bloom_filter(size_t size, size_t hash_size) + : size_(size) + , hash_size_(hash_size) + , bitmap_(std::make_shared(size_)) +{ +} + +bloom_filter::bloom_filter(size_t expected_size, bloom_error_rate error_rate) +{ + size_t optimal_m = 0; + size_t optimal_k = 0; + double actual_fp_rate = 0.0; + bloom_optimal_parameters( + expected_size, (error_rate / 100.0f), optimal_m, optimal_k, actual_fp_rate); + + size_ = optimal_m; + hash_size_ = optimal_k; + bitmap_ = std::make_shared(size_); +} + +/** + * 添加元素 + */ +void bloom_filter::add(const std::string& value) +{ + auto pair = hash(value); + auto h1 = pair.first; + auto h2 = pair.second; + // 确保h2是奇数,提高分布性 + if (h2 & 0x1 == 0) // 等同于 h2 % 2 == 0 + { + h2 += 1; + } + + for (size_t i = 0; i < hash_size_; ++i) + { + size_t index = (h1 + i * h2) % size_; + bitmap_->set(index); + } +} + +/** + * 判断元素是否存在(可能有误判) + */ +bool bloom_filter::contains(const std::string& value) const +{ + auto pair = hash(value); + auto h1 = pair.first; + auto h2 = pair.second; + if (h2 & 0x1 == 0) // 等同于 h2 % 2 == 0 + { + h2 += 1; + } + + for (size_t i = 0; i < hash_size_; ++i) + { + size_t index = (h1 + i * h2) % size_; + if (!bitmap_->get(index)) + { + return false; + } + } + return true; +} + +// 估算元素数量(测试函数,实际业务中不建议使用) +size_t bloom_filter::test_estimateCount() const +{ + // double m = size_; + // double k = hash_size_; + // double x = getSetBitCount(); + // return std::round(-(m / k) * std::log(1 - x / m)); + + size_t set_bits = test_getSetBitCount(); + if (set_bits == size_) + { + // 所有位都被设置,估算不可靠 + return std::numeric_limits::max(); + } + + double x = static_cast(set_bits) / size_; + return std::round(-static_cast(size_) / hash_size_ * std::log(1 - x)); +} + +// 双重哈希函数,生成k个哈希值 +std::pair bloom_filter::hash(const std::string& str) const +{ + std::hash hasher1; + std::hash hasher2; + + size_t h1 = hasher1(str); + size_t h2 = hasher2(str + "salt"); // 加盐获得不同的哈希值 + + return { h1, h2 }; +} + +/** + * @brief 计算最优哈希函数数量 + * + * @param m 位数组大小(比特数) + * @param n 预期插入元素数量 + * @return size_t + */ +size_t bloom_optimal_k(size_t m, size_t n) +{ + return static_cast(std::round(static_cast(m) / n * std::log(2))); +} + +/** + * @brief 计算最优位数组大小 + * + * @param n 预期插入元素数量 + * @param p 误判率 + * @return size_t + */ +size_t bloom_optimal_m(size_t n, double p) +{ + return static_cast(std::ceil(-(n * std::log(p)) / std::pow(std::log(2), 2))); +} + +/** + * @brief 计算实际误判率 + * + * @param m 位数组大小(比特数) + * @param n 预期插入元素数量 + * @param k 哈希函数数量 + * @return double + */ +double bloom_false_positive_rate(size_t m, size_t n, size_t k) +{ + return std::pow(1 - std::exp(-static_cast(k) * n / m), k); +} + +/** + * @brief 综合计算所有最优参数 + * + * @param n 预期的元素数量 + * @param target_fp_rate 目标误判率 + * @param optimal_m [out] 最优位数组大小 + * @param optimal_k [out] 最优哈西函数数量 + * @param actual_fp_rate [out] 实际误判率 + */ +void bloom_optimal_parameters(size_t n, + double target_fp_rate, + size_t& optimal_m, + size_t& optimal_k, + double& actual_fp_rate) +{ + optimal_m = bloom_optimal_m(n, target_fp_rate); + optimal_k = bloom_optimal_k(optimal_m, n); + actual_fp_rate = bloom_false_positive_rate(optimal_m, n, optimal_k); +} + +// class OptimizedBloomFilter +// { +// private: +// BloomFilter bf_; +// std::vector bit_array; +// size_t m; // 位数组大小 +// size_t k; // 哈希函数数量 +// DoubleHashingStrategy hasher; + +// public: +// // 自动计算最优参数 +// OptimizedBloomFilter(size_t expected_elements, double target_false_positive_rate = 0.01) +// : hasher(0, 0) +// { // 临时初始化,后面重新设置 + +// size_t optimal_m, optimal_k; +// double actual_fp; + +// bloom_optimal_parameters( +// expected_elements, target_false_positive_rate, optimal_m, optimal_k, actual_fp); +// bf_ = BloomFilter(optimal_m, optimal_k); + +// // 实际使用中建议增加20%的冗余 +// m = static_cast(optimal_m * 1.2); +// k = optimal_k; +// } + +// // 手动指定参数 +// OptimizedBloomFilter(size_t bit_array_size, size_t num_hashes) +// : m(bit_array_size) +// , k(num_hashes) +// , hasher(num_hashes, bit_array_size) +// { +// bit_array.resize(m, false); +// } + +// void add(const std::string& element) +// { +// auto hashes = hasher.get_hashes(element); +// for (size_t hash_val : hashes) +// { +// bit_array[hash_val] = true; +// } +// } + +// bool contains(const std::string& element) const +// { +// auto hashes = hasher.get_hashes(element); +// for (size_t hash_val : hashes) +// { +// if (!bit_array[hash_val]) +// { +// return false; +// } +// } +// return true; +// } + +// // 获取实际统计信息 +// void print_stats() const +// { +// size_t set_bits = 0; +// for (bool bit : bit_array) +// { +// if (bit) +// set_bits++; +// } + +// double utilization = static_cast(set_bits) / m; +// std::cout << "Set bits: " << set_bits << "/" << m << " (" << (utilization * 100) << "%)" +// << std::endl; +// } +// }; + +} // namespace cutl \ No newline at end of file diff --git a/src/usage_demo/bloomfilter.hpp b/src/usage_demo/bloomfilter.hpp new file mode 100644 index 0000000..18cd5ae --- /dev/null +++ b/src/usage_demo/bloomfilter.hpp @@ -0,0 +1,76 @@ +#include "common.hpp" +#include "common_util/bloomfilter.h" + +void TestBsetUsage() +{ + PrintSubTitle("TestBsetUsage"); + + // 创建布隆过滤器:位数组大小1000,使用3个哈希函数 + cutl::bloom_filter bloom(1000, 3); + + // 添加一些元素 + bloom.add("apple"); + bloom.add("banana"); + bloom.add("orange"); + + // 测试存在性 + std::cout << "Contains 'apple': " << bloom.contains("apple") << std::endl; // 应该为true + std::cout << "Contains 'banana': " << bloom.contains("banana") << std::endl; // 应该为true + std::cout << "Contains 'grape': " << bloom.contains("grape") + << std::endl; // 可能为false,或小概率为true(误判) +} + +void TestBoolParam() +{ + PrintSubTitle("TestBoolParam"); + + size_t optimal_m = 0; + size_t optimal_k = 0; + double actual_fp_rate = 0.0; + bloom_optimal_parameters( + 512, cutl::bloom_error_rate::percent_01, optimal_m, optimal_k, actual_fp_rate); + + std::cout << "optimal_m: " << optimal_m << std::endl; + std::cout << "optimal_k: " << optimal_k << std::endl; + std::cout << "actual_fp_rate: " << actual_fp_rate << std::endl; +} + +// 误差测试 +void TestAccuracy() +{ + PrintSubTitle("TestAccuracy"); + + int actualCount = 1000; + cutl::bloom_filter bloom(actualCount, 3); + + std::cout << "Start test, add " << actualCount << " element..." << std::endl; + // 1. 使用随机设备作为种子 + std::random_device rd; + // 2. 使用 Mersenne Twister 引擎(高质量随机数) + std::mt19937 gen(rd()); + // 3. 定义分布范围 [1, 10000] + std::uniform_int_distribution dist(1, 10000); + for (size_t i = 0; i < actualCount; i++) + { + // 确保生成唯一值 + bloom.add("element - " + std::to_string(dist(gen))); + } + + int estimateCount = bloom.test_estimateCount(); + // int estimateCount = bloom.estimate_cardinality_ml(); + double error = std::abs(estimateCount - actualCount) / (double)actualCount; + std::cout << "actualCount: " << actualCount << ", estimateCount:" << estimateCount + << std::endl; + std::cout << "Actual error: " << std::round(error * 100) << "%" << std::endl; + std::cout << "Theoretical error: " << std::round(bloom.test_getRelativeError() * 100) << "%" + << std::endl; +} + +void TestBloomFilter() +{ + PrintTitle("TestBloomFilter"); + + TestBsetUsage(); + TestBoolParam(); + TestAccuracy(); +} \ No newline at end of file diff --git a/src/usage_demo/main.cpp b/src/usage_demo/main.cpp index 8dcec15..4643bbf 100644 --- a/src/usage_demo/main.cpp +++ b/src/usage_demo/main.cpp @@ -1,5 +1,6 @@ #include "algoutil.hpp" #include "bitmap.hpp" +#include "bloomfilter.hpp" #include "common.hpp" #include "config.hpp" #include "datetime.hpp" @@ -55,8 +56,9 @@ int main(int argc, char* argv[]) // TestEventLoop(); // TestThreadPool(); // TestAlgorithmUtil(); - BitmapTest(); - TestHash(); + // BitmapTest(); + // TestHash(); + TestBloomFilter(); // usage_demo(); -- Gitee From 47791d67a2c50216300f82db2d12c9befc0192ba Mon Sep 17 00:00:00 2001 From: Spencer Date: Thu, 16 Oct 2025 21:21:44 +0800 Subject: [PATCH 33/40] fix: bloomfilter.h --- include/common_util/bitmap.h | 18 +++++++++--------- include/common_util/bloomfilter.h | 10 +++++----- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/include/common_util/bitmap.h b/include/common_util/bitmap.h index b6b58fd..8a1498a 100644 --- a/include/common_util/bitmap.h +++ b/include/common_util/bitmap.h @@ -135,10 +135,6 @@ public: */ class bitmap : public ibitmap { -protected: - std::vector bits_; ///< Underlying storage for bits - size_t size_; ///< Total number of bits the bitmap can hold - public: /** * @brief Construct a new bitmap object with specified size @@ -318,6 +314,10 @@ private: * @return std::string Hexadecimal string representation */ std::string to_hex(int compress = 1) const; + +protected: + std::vector bits_; ///< Underlying storage for bits + size_t size_; ///< Total number of bits the bitmap can hold }; /** @@ -394,11 +394,6 @@ private: */ class roaring_bitmap : public ibitmap { -private: - size_t block_size_; ///< Size of each block in bits - std::unordered_map - container_; ///< Storage for blocks (key: block index, value: bitmap block) - public: /** * @brief Construct a new roaring_bitmap object @@ -578,6 +573,11 @@ public: * @throw std::invalid_argument If block sizes differ or blocks are missing */ roaring_bitmap& operator^=(const roaring_bitmap& other); + +private: + size_t block_size_; ///< Size of each block in bits + std::unordered_map + container_; ///< Storage for blocks (key: block index, value: bitmap block) }; } // namespace cutl \ No newline at end of file diff --git a/include/common_util/bloomfilter.h b/include/common_util/bloomfilter.h index 22e563f..3ed4543 100644 --- a/include/common_util/bloomfilter.h +++ b/include/common_util/bloomfilter.h @@ -23,11 +23,6 @@ enum bloom_error_rate class bloom_filter { -private: - size_t size_; - std::shared_ptr bitmap_; - size_t hash_size_; - public: bloom_filter(size_t size, size_t hash_size); @@ -65,6 +60,11 @@ public: private: // 双重哈希函数,生成k个哈希值 std::pair hash(const std::string& str) const; + +private: + size_t size_; + std::shared_ptr bitmap_; + size_t hash_size_; }; /** -- Gitee From a7236ae79587a93a272c9abdac932971023b16e1 Mon Sep 17 00:00:00 2001 From: Spencer Date: Thu, 16 Oct 2025 21:24:34 +0800 Subject: [PATCH 34/40] fix: bloomfilter.h --- include/common_util/bloomfilter.h | 16 ++++----- src/common_util/bloomfilter.cpp | 44 +++++++++++++++--------- src/usage_demo/bloomfilter.hpp | 56 +++++++++++++++---------------- 3 files changed, 64 insertions(+), 52 deletions(-) diff --git a/include/common_util/bloomfilter.h b/include/common_util/bloomfilter.h index 3ed4543..4ff0ac0 100644 --- a/include/common_util/bloomfilter.h +++ b/include/common_util/bloomfilter.h @@ -46,16 +46,16 @@ public: */ void clear() { bitmap_->reset(); } - /** - * 获取位图中设置位的数量(测试函数,实际业务中不建议使用) - */ - size_t test_getSetBitCount() const { return bitmap_->count(); } + // /** + // * 获取位图中设置位的数量(测试函数,实际业务中不建议使用) + // */ + // size_t test_getSetBitCount() const; - // 估算元素数量(测试函数,实际业务中不建议使用) - size_t test_estimateCount() const; + // // 估算元素数量(测试函数,实际业务中不建议使用) + // size_t test_estimateCount() const; - // 获取相对误差(测试函数,实际业务中不建议使用) - double test_getRelativeError() { return 1.04 / std::sqrt(size_); } + // // 获取相对误差(测试函数,实际业务中不建议使用) + // double test_getRelativeError(); private: // 双重哈希函数,生成k个哈希值 diff --git a/src/common_util/bloomfilter.cpp b/src/common_util/bloomfilter.cpp index f2426dd..9a7957d 100644 --- a/src/common_util/bloomfilter.cpp +++ b/src/common_util/bloomfilter.cpp @@ -68,24 +68,36 @@ bool bloom_filter::contains(const std::string& value) const return true; } -// 估算元素数量(测试函数,实际业务中不建议使用) -size_t bloom_filter::test_estimateCount() const -{ - // double m = size_; - // double k = hash_size_; - // double x = getSetBitCount(); - // return std::round(-(m / k) * std::log(1 - x / m)); +// // 获取位图中设置位的数量(测试函数,实际业务中不建议使用) +// size_t bloom_filter::test_getSetBitCount() const +// { +// return bitmap_->count(); +// } - size_t set_bits = test_getSetBitCount(); - if (set_bits == size_) - { - // 所有位都被设置,估算不可靠 - return std::numeric_limits::max(); - } +// // 估算元素数量(测试函数,实际业务中不建议使用) +// size_t bloom_filter::test_estimateCount() const +// { +// // double m = size_; +// // double k = hash_size_; +// // double x = getSetBitCount(); +// // return std::round(-(m / k) * std::log(1 - x / m)); - double x = static_cast(set_bits) / size_; - return std::round(-static_cast(size_) / hash_size_ * std::log(1 - x)); -} +// size_t set_bits = test_getSetBitCount(); +// if (set_bits == size_) +// { +// // 所有位都被设置,估算不可靠 +// return std::numeric_limits::max(); +// } + +// double x = static_cast(set_bits) / size_; +// return std::round(-static_cast(size_) / hash_size_ * std::log(1 - x)); +// } + +// // 获取相对误差(测试函数,实际业务中不建议使用) +// double bloom_filter::test_getRelativeError() +// { +// return 1.04 / std::sqrt(size_); +// } // 双重哈希函数,生成k个哈希值 std::pair bloom_filter::hash(const std::string& str) const diff --git a/src/usage_demo/bloomfilter.hpp b/src/usage_demo/bloomfilter.hpp index 18cd5ae..7d9ae54 100644 --- a/src/usage_demo/bloomfilter.hpp +++ b/src/usage_demo/bloomfilter.hpp @@ -35,36 +35,36 @@ void TestBoolParam() std::cout << "actual_fp_rate: " << actual_fp_rate << std::endl; } -// 误差测试 -void TestAccuracy() -{ - PrintSubTitle("TestAccuracy"); +// // 误差测试 +// void TestAccuracy() +// { +// PrintSubTitle("TestAccuracy"); - int actualCount = 1000; - cutl::bloom_filter bloom(actualCount, 3); +// int actualCount = 1000; +// cutl::bloom_filter bloom(actualCount, 3); - std::cout << "Start test, add " << actualCount << " element..." << std::endl; - // 1. 使用随机设备作为种子 - std::random_device rd; - // 2. 使用 Mersenne Twister 引擎(高质量随机数) - std::mt19937 gen(rd()); - // 3. 定义分布范围 [1, 10000] - std::uniform_int_distribution dist(1, 10000); - for (size_t i = 0; i < actualCount; i++) - { - // 确保生成唯一值 - bloom.add("element - " + std::to_string(dist(gen))); - } +// std::cout << "Start test, add " << actualCount << " element..." << std::endl; +// // 1. 使用随机设备作为种子 +// std::random_device rd; +// // 2. 使用 Mersenne Twister 引擎(高质量随机数) +// std::mt19937 gen(rd()); +// // 3. 定义分布范围 [1, 10000] +// std::uniform_int_distribution dist(1, 10000); +// for (size_t i = 0; i < actualCount; i++) +// { +// // 确保生成唯一值 +// bloom.add("element - " + std::to_string(dist(gen))); +// } - int estimateCount = bloom.test_estimateCount(); - // int estimateCount = bloom.estimate_cardinality_ml(); - double error = std::abs(estimateCount - actualCount) / (double)actualCount; - std::cout << "actualCount: " << actualCount << ", estimateCount:" << estimateCount - << std::endl; - std::cout << "Actual error: " << std::round(error * 100) << "%" << std::endl; - std::cout << "Theoretical error: " << std::round(bloom.test_getRelativeError() * 100) << "%" - << std::endl; -} +// int estimateCount = bloom.test_estimateCount(); +// // int estimateCount = bloom.estimate_cardinality_ml(); +// double error = std::abs(estimateCount - actualCount) / (double)actualCount; +// std::cout << "actualCount: " << actualCount << ", estimateCount:" << estimateCount +// << std::endl; +// std::cout << "Actual error: " << std::round(error * 100) << "%" << std::endl; +// std::cout << "Theoretical error: " << std::round(bloom.test_getRelativeError() * 100) << "%" +// << std::endl; +// } void TestBloomFilter() { @@ -72,5 +72,5 @@ void TestBloomFilter() TestBsetUsage(); TestBoolParam(); - TestAccuracy(); + // TestAccuracy(); } \ No newline at end of file -- Gitee From dc639e7c15e0fcda877d44017b493be42e122c54 Mon Sep 17 00:00:00 2001 From: Spencer Date: Thu, 16 Oct 2025 21:28:51 +0800 Subject: [PATCH 35/40] feat: add comment for bloomfilter.h --- include/common_util/bloomfilter.h | 140 ++++++++++++++++++++++-------- 1 file changed, 103 insertions(+), 37 deletions(-) diff --git a/include/common_util/bloomfilter.h b/include/common_util/bloomfilter.h index 4ff0ac0..ead0c45 100644 --- a/include/common_util/bloomfilter.h +++ b/include/common_util/bloomfilter.h @@ -1,4 +1,24 @@ -#pragma once +/** + * @copyright Copyright (c) 2025, Spencer.Luo. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing permissions and + * limitations. + * + * @file bloomfilter.h + * @brief bloom_filter algorithm. + * @author Spencer + * @date 2025-10-16 + */ + +#pragma once #include "bitmap.h" #include @@ -10,99 +30,145 @@ namespace cutl { -// 误判率 +/** + * @brief False positive rate enumeration for bloom filter + * + * This enum defines possible false positive rates for the bloom filter + * as percentage values. + */ enum bloom_error_rate { - percent_01 = 1, - percent_10 = 10, - percent_20 = 20, - percent_30 = 30, - percent_40 = 40, - percent_50 = 50, + percent_01 = 1, // <= 1% false positive rate + percent_10 = 10, // <= 10% false positive rate + percent_20 = 20, // <= 20% false positive rate + percent_30 = 30, // <= 30% false positive rate + percent_40 = 40, // <= 40% false positive rate + percent_50 = 50, // <= 50% false positive rate }; +/** + * @brief Bloom filter implementation + * + * A bloom filter is a space-efficient probabilistic data structure that is used + * to test whether an element is a member of a set. It may return false positives + * but never false negatives. + */ class bloom_filter { public: + /** + * @brief Construct a new bloom filter object with specified sizes + * + * @param size Total number of bits in the bitmap + * @param hash_size Number of hash functions to use + */ bloom_filter(size_t size, size_t hash_size); + /** + * @brief Construct a new bloom filter object with automatic parameter calculation + * + * @param expected_size Estimated number of elements to be inserted + * @param error_rate Desired false positive rate (default: 1%) + */ bloom_filter(size_t expected_size, bloom_error_rate error_rate = bloom_error_rate::percent_01); + /** + * @brief Destroy the bloom filter object + */ ~bloom_filter() = default; public: /** - * 添加元素 + * @brief Add an element to the bloom filter + * + * @param value The string element to add */ void add(const std::string& value); /** - * 判断元素是否存在(可能有误判) + * @brief Check if an element may be in the bloom filter + * + * Note: This may return true for elements not actually inserted (false positive) + * but will never return false for elements that were inserted. + * + * @param value The string element to check + * @return true If the element is probably in the set + * @return false If the element is definitely not in the set */ bool contains(const std::string& value) const; /** - * 清空布隆过滤器 + * @brief Clear all elements from the bloom filter + * + * Resets all bits in the underlying bitmap to 0. */ void clear() { bitmap_->reset(); } // /** - // * 获取位图中设置位的数量(测试函数,实际业务中不建议使用) + // * Get the number of set bits in the bitmap (test function, not recommended for production) // */ // size_t test_getSetBitCount() const; - // // 估算元素数量(测试函数,实际业务中不建议使用) + // // Estimate the number of elements (test function, not recommended for production) // size_t test_estimateCount() const; - // // 获取相对误差(测试函数,实际业务中不建议使用) + // // Get the relative error (test function, not recommended for production) // double test_getRelativeError(); private: - // 双重哈希函数,生成k个哈希值 + /** + * @brief Double hashing function to generate k hash values + * + * Implements a double hashing technique to generate multiple hash values + * using two base hash functions. + * + * @param str The string to hash + * @return std::pair Two base hash values used to generate k hashes + */ std::pair hash(const std::string& str) const; private: - size_t size_; - std::shared_ptr bitmap_; - size_t hash_size_; + size_t size_; ///< Total number of bits in the bitmap + std::shared_ptr bitmap_; ///< Underlying bitmap storage + size_t hash_size_; ///< Number of hash functions to use }; /** - * @brief 计算最优哈希函数数量 + * @brief Calculate the optimal number of hash functions * - * @param m 位数组大小(比特数) - * @param n 预期插入元素数量 - * @return size_t + * @param m Size of the bit array (in bits) + * @param n Expected number of inserted elements + * @return size_t Optimal number of hash functions */ size_t bloom_optimal_k(size_t m, size_t n); /** - * @brief 计算最优位数组大小 + * @brief Calculate the optimal size of the bit array * - * @param n 预期插入元素数量 - * @param p 误判率 - * @return size_t + * @param n Expected number of inserted elements + * @param p Desired false positive rate (0 < p < 1) + * @return size_t Optimal bit array size in bits */ size_t bloom_optimal_m(size_t n, double p); /** - * @brief 计算实际误判率 + * @brief Calculate the actual false positive rate * - * @param m 位数组大小(比特数) - * @param n 预期插入元素数量 - * @param k 哈希函数数量 - * @return double + * @param m Size of the bit array (in bits) + * @param n Expected number of inserted elements + * @param k Number of hash functions + * @return double Actual false positive rate */ double bloom_false_positive_rate(size_t m, size_t n, size_t k); /** - * @brief 综合计算所有最优参数 + * @brief Comprehensive calculation of all optimal parameters * - * @param n 预期的元素数量 - * @param target_fp_rate 目标误判率 - * @param optimal_m [out] 最优位数组大小 - * @param optimal_k [out] 最优哈西函数数量 - * @param actual_fp_rate [out] 实际误判率 + * @param n Expected number of elements + * @param target_fp_rate Target false positive rate + * @param optimal_m [out] Calculated optimal bit array size + * @param optimal_k [out] Calculated optimal number of hash functions + * @param actual_fp_rate [out] Actual false positive rate with these parameters */ void bloom_optimal_parameters(size_t n, double target_fp_rate, -- Gitee From 236cfbed61056264811c4384f0e3765f6abf747a Mon Sep 17 00:00:00 2001 From: Spencer Date: Sat, 18 Oct 2025 11:17:15 +0800 Subject: [PATCH 36/40] feat: add hyperloglog --- include/common_util/hyperloglog.h | 57 +++++++++ src/common_util/hyperloglog.cpp | 190 ++++++++++++++++++++++++++++++ src/usage_demo/hyperloglog.hpp | 109 +++++++++++++++++ src/usage_demo/main.cpp | 4 +- 4 files changed, 359 insertions(+), 1 deletion(-) create mode 100644 include/common_util/hyperloglog.h create mode 100644 src/common_util/hyperloglog.cpp create mode 100644 src/usage_demo/hyperloglog.hpp diff --git a/include/common_util/hyperloglog.h b/include/common_util/hyperloglog.h new file mode 100644 index 0000000..58a2296 --- /dev/null +++ b/include/common_util/hyperloglog.h @@ -0,0 +1,57 @@ +#include "hash.h" +#include +#include +#include +#include + +namespace cutl +{ + +class HyperLogLog +{ +public: + // 构造函数 + explicit HyperLogLog(int precision = 14); + + ~HyperLogLog() { registers_.clear(); } + +public: + // 添加uint64_t类型的元素 + void add(uint64_t value); + + // 添加字符串类型的元素 + void add(const std::string& value); + + // 估算基数(返回64位整数) + uint64_t count() const; + + // 重置所有寄存器 + void reset() { std::fill(registers_.begin(), registers_.end(), 0); } + + // 合并另一个 HyperLogLog + void merge(const HyperLogLog& other); + + std::string to_string() const { return this->to_hex(1); } + + void from_string(const std::string& text); + + // 获取理论误差 + double get_theoretical_error() const { return 1.04 / std::sqrt(m_); } + +private: + // 计算 alpha 常数 + double compute_alpha() const; + + // 统计前导零的数量(64位版本) + int countLeading_zeros(uint64_t bits) const; + + std::string to_hex(int compress = 1) const; + +private: + int p_; + int m_; + std::vector registers_; + double alpha_; +}; + +} // namespace cutl diff --git a/src/common_util/hyperloglog.cpp b/src/common_util/hyperloglog.cpp new file mode 100644 index 0000000..d473fe4 --- /dev/null +++ b/src/common_util/hyperloglog.cpp @@ -0,0 +1,190 @@ +#include "hyperloglog.h" +#include "hash.h" +#include "strfmt.h" +#include +#include + +namespace cutl +{ + +// 构造函数 +HyperLogLog::HyperLogLog(int precision) + : p_(precision) + , m_(1 << precision) + , registers_(m_, 0) +{ + if (p_ < 4 || p_ > 20) + { + throw std::invalid_argument("Precision must be between 4 and 20"); + } + alpha_ = compute_alpha(); +} + +// 添加uint64_t类型的元素 +void HyperLogLog::add(uint64_t value) +{ + // 前 p 位作为桶索引 + uint64_t index = value >> (64 - p_); + + // 后 (64-p) 位统计前导零 + uint64_t bits = value & ((1ULL << (64 - p_)) - 1); + int leadingZeros = countLeading_zeros(bits) + 1; + + if (leadingZeros > registers_[index]) + { + registers_[index] = static_cast(leadingZeros); + } +} + +// 添加字符串类型的元素 +void HyperLogLog::add(const std::string& value) +{ + uint64_t hashValue = cutl::hash_fnv1a_64(value); + add(hashValue); +} + +// 估算基数(返回64位整数) +uint64_t HyperLogLog::count() const +{ + double sum = 0.0; + int zeroCount = 0; + + for (int i = 0; i < m_; i++) + { + if (registers_[i] == 0) + { + zeroCount++; + } + sum += std::pow(2.0, -static_cast(registers_[i])); + } + + // 基础估算 + double estimate = (alpha_ * m_ * m_) / sum; + + // 小范围数值 修正阈值 + const uint64_t SMALL_CORRECTION_THRESHOLD = 2.5 * m_; + // 大范围数值 修正阈值 + const uint64_t LARGE_CORRECTION_THRESHOLD = (1ULL << 32); + // 64位版本的修正策略 + if (estimate <= SMALL_CORRECTION_THRESHOLD) + { + if (zeroCount > 0) + { + estimate = m_ * std::log(static_cast(m_) / zeroCount); + } + } + else if (estimate > LARGE_CORRECTION_THRESHOLD) + { + estimate = + -LARGE_CORRECTION_THRESHOLD * std::log(1.0 - estimate / LARGE_CORRECTION_THRESHOLD); + } + + return static_cast(std::round(estimate)); +} + +// 合并另一个 HyperLogLog +void HyperLogLog::merge(const HyperLogLog& other) +{ + if (m_ != other.m_) + { + throw std::invalid_argument("Precision mismatch"); + } + + for (int i = 0; i < m_; i++) + { + if (other.registers_[i] > registers_[i]) + { + registers_[i] = other.registers_[i]; + } + } +} + +void HyperLogLog::from_string(const std::string& text) +{ + for (char c : text) + { + if (!isxdigit(c)) + { + throw std::runtime_error("Invalid hexadecimal string"); + } + } + + // 先将bitmap所有数据重置成0 + this->reset(); + + // 每两个字符表示一个字节 + const size_t expectedLength = registers_.size() * 2; + size_t strLen = std::min(expectedLength, text.length()); + size_t byteSize = strLen >> 1; // strLen / 2 + // 将十六进制字符串转换为字节数组 + for (size_t i = 0; i < byteSize; i++) + { + std::string hexByte = text.substr(i * 2, 2); + registers_[i] = std::stoi(hexByte, nullptr, 16); + } +} + +// 计算 alpha 常数 +double HyperLogLog::compute_alpha() const +{ + switch (m_) + { + case 16: + return 0.673; + case 32: + return 0.697; + case 64: + return 0.709; + default: + return 0.7213 / (1.0 + 1.079 / m_); + } +} + +// 统计前导零的数量(64位版本) +int HyperLogLog::countLeading_zeros(uint64_t bits) const +{ + if (bits == 0) + return 64 - p_; + + const int totalBits = 64 - p_; + int position = totalBits - 1; + + while (position >= 0 && (bits & (1ULL << position)) == 0) + { + position--; + } + + return totalBits - (position + 1); +} + +std::string HyperLogLog::to_hex(int compress) const +{ + if (compress == 0) + { + return cutl::to_hex(registers_.data(), this->m_, true, 0); + } + else + { + // 找到最后一个非零字节的索引 + int lastNonZeroIndex = 0; + for (int i = registers_.size() - 1; i >= 0; i--) + { + if (registers_[i] != 0) + { + lastNonZeroIndex = i; + break; + } + } + + // 如果所有字节都是零,返回空字符串 + if (lastNonZeroIndex == 0) + { + return ""; + } + + // 只序列化到最后一个非零字节 + return cutl::to_hex(registers_.data(), lastNonZeroIndex + 1, true, 0); + } +} + +} // namespace cutl diff --git a/src/usage_demo/hyperloglog.hpp b/src/usage_demo/hyperloglog.hpp new file mode 100644 index 0000000..7ddbb35 --- /dev/null +++ b/src/usage_demo/hyperloglog.hpp @@ -0,0 +1,109 @@ +#include "common.hpp" +#include "common_util/hyperloglog.h" + +// 测试函数 +void testUsage() +{ + std::cout << "------ 基础功能使用 ------" << std::endl; + cutl::HyperLogLog hll(15); + hll.add("abc"); + hll.add("def"); + // hll.add("ghijk"); + hll.add("123"); + std::cout << "count1: " << hll.count() << std::endl; + hll.add("abc"); + std::cout << "count2: " << hll.count() << std::endl; +} + +void testMerge() +{ + std::cout << "------ merge功能演示 ------" << std::endl; + cutl::HyperLogLog hll_01(14); + cutl::HyperLogLog hll_02(14); + + for (int i = 0; i < 1000; i++) + { + hll_01.add("element-" + std::to_string(i) + "-" + std::to_string(rand())); + hll_02.add("element-" + std::to_string(i) + "-" + std::to_string(rand())); + } + + std::cout << "hll_01 实际值: 1000, 估算值: " << hll_01.count() << std::endl; + std::cout << "hll_02 实际值: 1000, 估算值: " << hll_02.count() << std::endl; + hll_01.merge(hll_02); + std::cout << "合并后 hll_01 估算值: " << hll_01.count() << std::endl; +} + +// 生成64位随机哈希(用于测试) +uint64_t generateRandomHash() +{ + static std::random_device rd; + static std::mt19937_64 gen(rd()); + static std::uniform_int_distribution dis(1, 10000000); + return dis(gen); +} + +// 测试函数 +void test64BitPerformance() +{ + std::cout << "====== 64位 cutl::HyperLogLog 性能测试 ======" << std::endl; + + cutl::HyperLogLog hll(14); + const uint64_t actualCount = 10000; // 1000万 + + std::cout << "测试数据量: " << actualCount << std::endl; + std::cout << "哈希空间: 2^64 = " << ((1ULL << 63) * 2 - 1) << std::endl; + + auto start = std::chrono::high_resolution_clock::now(); + + for (uint64_t i = 0; i < actualCount; i++) + { + // 使用64位哈希值直接添加 + std::string str = + std::string("element_") + std::to_string(i) + "_" + std::to_string(generateRandomHash()); + hll.add(std::move(str)); + } + + auto end = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(end - start); + + uint64_t estimated = hll.count(); + double error = std::abs(static_cast(estimated) - actualCount) / actualCount; + + std::cout << "实际基数: " << actualCount << std::endl; + std::cout << "估计基数: " << estimated << std::endl; + std::cout << "相对误差: " << std::fixed << std::setprecision(4) << error * 100 << "%" + << std::endl; + std::cout << "理论误差: " << std::fixed << std::setprecision(4) + << hll.get_theoretical_error() * 100 << "%" << std::endl; + std::cout << "处理时间: " << duration.count() << "ms" << std::endl; +} + +void testCollisionResistance() +{ + std::cout << "\n====== 碰撞抵抗测试 ======" << std::endl; + + cutl::HyperLogLog hll(12); + + // 测试大量相似字符串:100万 + for (int i = 0; i < 1000000; i++) + { + hll.add("user_" + std::to_string(i) + "_session_" + std::to_string(rand())); + } + + std::cout << "100万相似字符串估计: " << hll.count() << std::endl; +} + +void TestHyperLogLog() +{ + try + { + testUsage(); + testMerge(); + test64BitPerformance(); + testCollisionResistance(); + } + catch (const std::exception& e) + { + std::cerr << "错误: " << e.what() << std::endl; + } +} \ No newline at end of file diff --git a/src/usage_demo/main.cpp b/src/usage_demo/main.cpp index 4643bbf..5a141be 100644 --- a/src/usage_demo/main.cpp +++ b/src/usage_demo/main.cpp @@ -9,6 +9,7 @@ #include "filepath.hpp" #include "fileutil.hpp" #include "hash.hpp" +#include "hyperloglog.hpp" #include "lrucache.hpp" #include "print.hpp" #include "singleton.hpp" @@ -58,7 +59,8 @@ int main(int argc, char* argv[]) // TestAlgorithmUtil(); // BitmapTest(); // TestHash(); - TestBloomFilter(); + // TestBloomFilter(); + TestHyperLogLog(); // usage_demo(); -- Gitee From 578555b29f901d332d3dfc785f9b198bd9d8dd66 Mon Sep 17 00:00:00 2001 From: Spencer Date: Sat, 18 Oct 2025 11:53:42 +0800 Subject: [PATCH 37/40] fix: bitmap hyperloglog --- include/common_util/hyperloglog.h | 113 ++++++++++++++++++++++++++---- src/common_util/bitmap.cpp | 62 +++++++++++----- src/common_util/hyperloglog.cpp | 13 +++- src/usage_demo/hyperloglog.hpp | 71 +++++++++---------- 4 files changed, 188 insertions(+), 71 deletions(-) diff --git a/include/common_util/hyperloglog.h b/include/common_util/hyperloglog.h index 58a2296..1993b4f 100644 --- a/include/common_util/hyperloglog.h +++ b/include/common_util/hyperloglog.h @@ -1,4 +1,24 @@ -#include "hash.h" +/** + * @copyright Copyright (c) 2025, Spencer.Luo. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing permissions and + * limitations. + * + * @file hyperloglog.h + * @brief HyperLogLog algorithm implementation for approximate cardinality counting + * @author Spencer + * @date 2025-10-12 + */ + +#include "hash.h" #include #include #include @@ -7,51 +27,114 @@ namespace cutl { +/** + * @brief HyperLogLog algorithm implementation + * + * This class provides an efficient implementation of the HyperLogLog algorithm, + * which is used for approximating the number of distinct elements (cardinality) + * in a multiset with high accuracy and low memory usage. + */ class HyperLogLog { public: - // 构造函数 + /** + * @brief Construct a new HyperLogLog object + * + * @param precision The precision parameter (p), which determines the number of registers + * and the accuracy. Typical values range from 4 to 16. Defaults to 14. + */ explicit HyperLogLog(int precision = 14); + /** + * @brief Destroy the HyperLogLog object + */ ~HyperLogLog() { registers_.clear(); } public: - // 添加uint64_t类型的元素 + /** + * @brief Add a 64-bit integer element to the HyperLogLog structure + * + * @param value The 64-bit integer to add + */ void add(uint64_t value); - // 添加字符串类型的元素 + /** + * @brief Add a string element to the HyperLogLog structure + * + * @param value The string to add + */ void add(const std::string& value); - // 估算基数(返回64位整数) + /** + * @brief Estimate the cardinality of the set + * + * @return uint64_t The approximate number of distinct elements + */ uint64_t count() const; - // 重置所有寄存器 + /** + * @brief Reset all registers to their initial state + */ void reset() { std::fill(registers_.begin(), registers_.end(), 0); } - // 合并另一个 HyperLogLog + /** + * @brief Merge another HyperLogLog structure into this one + * + * @param other The HyperLogLog instance to merge with + */ void merge(const HyperLogLog& other); + /** + * @brief Convert the HyperLogLog structure to a string representation + * + * @return std::string Compressed hex string representation + */ std::string to_string() const { return this->to_hex(1); } + /** + * @brief Initialize the HyperLogLog structure from a string representation + * + * @param text The string to parse and load + */ void from_string(const std::string& text); - // 获取理论误差 + /** + * @brief Get the theoretical error margin of the cardinality estimate + * + * @return double The theoretical error (approximately 1.04 / sqrt(m)) + */ double get_theoretical_error() const { return 1.04 / std::sqrt(m_); } private: - // 计算 alpha 常数 + /** + * @brief Compute the alpha constant used in cardinality estimation + * + * The alpha constant depends on the number of registers (m). + * @return double The computed alpha value + */ double compute_alpha() const; - // 统计前导零的数量(64位版本) + /** + * @brief Count the number of leading zeros in a 64-bit integer + * + * @param bits The 64-bit integer to analyze + * @return int The number of leading zeros + */ int countLeading_zeros(uint64_t bits) const; + /** + * @brief Convert the HyperLogLog structure to a hexadecimal string + * + * @param compress 0 for uncompressed, 1 for compressed (truncates trailing zeros) + * @return std::string Hexadecimal string representation + */ std::string to_hex(int compress = 1) const; private: - int p_; - int m_; - std::vector registers_; - double alpha_; + int p_; ///< Precision parameter, determines the number of registers (m = 2^p) + int m_; ///< Number of registers (m = 2^p) + std::vector registers_; ///< Array of registers storing maximum leading zeros + double alpha_; ///< Alpha constant used in cardinality estimation formula }; -} // namespace cutl +} // namespace cutl \ No newline at end of file diff --git a/src/common_util/bitmap.cpp b/src/common_util/bitmap.cpp index 5042787..b6cbd53 100644 --- a/src/common_util/bitmap.cpp +++ b/src/common_util/bitmap.cpp @@ -2,9 +2,7 @@ #include "inner/logger.h" #include "strfmt.h" #include -// #include #include -// #include #include namespace cutl @@ -25,7 +23,9 @@ void bitmap::set(size_t position) { if (position >= size_) { - throw std::out_of_range("Position " + std::to_string(position) + " out of range"); + auto errMsg = "Position " + std::to_string(position) + " out of range"; + CUTL_ERROR(errMsg); + throw std::out_of_range(errMsg); } size_t byteIndex = position >> 3; // 等价于 position / 8 @@ -41,7 +41,9 @@ bool bitmap::get(size_t position) const // printf("position:%d, size_:%d\n", position, size_); if (position >= size_) { - throw std::out_of_range("Position " + std::to_string(position) + " out of range"); + auto errMsg = "Position " + std::to_string(position) + " out of range"; + CUTL_ERROR(errMsg); + throw std::out_of_range(errMsg); } size_t byteIndex = position >> 3; // 等价于 position / 8 size_t bitIndex = position & 0x7; // 等价于 position % 8 @@ -55,7 +57,9 @@ void bitmap::reset(size_t position) { if (position >= size_) { - throw std::out_of_range("Position " + std::to_string(position) + " out of range"); + auto errMsg = "Position " + std::to_string(position) + " out of range"; + CUTL_ERROR(errMsg); + throw std::out_of_range(errMsg); } size_t byteIndex = position >> 3; // 等价于 position / 8 size_t bitIndex = position & 0x7; // 等价于 position % 8 @@ -137,7 +141,9 @@ void bitmap::from_string(const std::string& text) { if (!isxdigit(c)) { - throw std::runtime_error("Invalid hexadecimal string"); + std::string errMsg("Invalid hexadecimal string"); + CUTL_ERROR(errMsg); + throw std::runtime_error(errMsg); } } @@ -263,7 +269,9 @@ bitmap& bitmap::operator&=(const bitmap& other) { if (size_ != other.size_) { - throw std::invalid_argument("Bitmaps must have same size"); + std::string errMsg("Bitmaps must have same size"); + CUTL_ERROR(errMsg); + throw std::invalid_argument(errMsg); } for (size_t i = 0; i < bits_.size(); i++) { @@ -276,7 +284,9 @@ bitmap& bitmap::operator|=(const bitmap& other) { if (size_ != other.size_) { - throw std::invalid_argument("Bitmaps must have same size"); + std::string errMsg("Bitmaps must have same size"); + CUTL_ERROR(errMsg); + throw std::invalid_argument(errMsg); } for (size_t i = 0; i < bits_.size(); i++) { @@ -289,7 +299,9 @@ bitmap& bitmap::operator^=(const bitmap& other) { if (size_ != other.size_) { - throw std::invalid_argument("Bitmaps must have same size"); + std::string errMsg("Bitmaps must have same size"); + CUTL_ERROR(errMsg); + throw std::invalid_argument(errMsg); } for (size_t i = 0; i < bits_.size(); i++) { @@ -448,7 +460,9 @@ void roaring_bitmap::reset(size_t position) auto itr = container_.find(key); if (itr == container_.end()) { - throw std::out_of_range("Position " + std::to_string(position) + " not in container"); + auto errMsg = "Position " + std::to_string(position) + " not in container"; + CUTL_ERROR(errMsg); + throw std::out_of_range(errMsg); } itr->second.reset(bitPosition); @@ -523,7 +537,9 @@ void roaring_bitmap::from_string(const std::string& text) // 简单校验格式({...}结构) if (text.empty() || text.front() != '{' || text.back() != '}') { - throw std::runtime_error("Invalid roaring bitmap string format"); + std::string errMsg("Invalid roaring bitmap string format"); + CUTL_ERROR(errMsg); + throw std::runtime_error(errMsg); } // 提取中间内容 @@ -636,7 +652,9 @@ roaring_bitmap roaring_bitmap::operator&(const roaring_bitmap& other) const { if (block_size() != other.block_size()) { - throw std::invalid_argument("RoaringBitmap must have same block_size"); + std::string errMsg("RoaringBitmap must have same block_size"); + CUTL_ERROR(errMsg); + throw std::invalid_argument(errMsg); } roaring_bitmap rBitmap(block_size_); for (auto itr = container_.begin(); itr != container_.end(); itr++) @@ -661,7 +679,9 @@ roaring_bitmap roaring_bitmap::operator|(const roaring_bitmap& other) const { if (block_size() != other.block_size()) { - throw std::invalid_argument("RoaringBitmap must have same block_size"); + std::string errMsg("RoaringBitmap must have same block_size"); + CUTL_ERROR(errMsg); + throw std::invalid_argument(errMsg); } roaring_bitmap rBitmap(block_size_); @@ -717,7 +737,9 @@ roaring_bitmap roaring_bitmap::operator^(const roaring_bitmap& other) const { if (block_size() != other.block_size()) { - throw std::invalid_argument("RoaringBitmap must have same block_size"); + std::string errMsg("RoaringBitmap must have same block_size"); + CUTL_ERROR(errMsg); + throw std::invalid_argument(errMsg); } roaring_bitmap rBitmap(block_size_); for (auto itr = container_.begin(); itr != container_.end(); itr++) @@ -755,7 +777,9 @@ roaring_bitmap& roaring_bitmap::operator&=(const roaring_bitmap& other) { if (block_size() != other.block_size()) { - throw std::invalid_argument("RoaringBitmap must have same block_size"); + std::string errMsg("RoaringBitmap must have same block_size"); + CUTL_ERROR(errMsg); + throw std::invalid_argument(errMsg); } // 使用迭代器遍历,通过 erase 的返回值更新迭代器 @@ -794,7 +818,9 @@ roaring_bitmap& roaring_bitmap::operator|=(const roaring_bitmap& other) { if (block_size() != other.block_size()) { - throw std::invalid_argument("RoaringBitmap must have same block_size"); + std::string errMsg("RoaringBitmap must have same block_size"); + CUTL_ERROR(errMsg); + throw std::invalid_argument(errMsg); } for (auto itr = container_.begin(); itr != container_.end(); itr++) @@ -826,7 +852,9 @@ roaring_bitmap& roaring_bitmap::operator^=(const roaring_bitmap& other) { if (block_size() != other.block_size()) { - throw std::invalid_argument("RoaringBitmap must have same block_size"); + std::string errMsg("RoaringBitmap must have same block_size"); + CUTL_ERROR(errMsg); + throw std::invalid_argument(errMsg); } for (auto itr = container_.begin(); itr != container_.end(); itr++) diff --git a/src/common_util/hyperloglog.cpp b/src/common_util/hyperloglog.cpp index d473fe4..9116114 100644 --- a/src/common_util/hyperloglog.cpp +++ b/src/common_util/hyperloglog.cpp @@ -1,5 +1,6 @@ #include "hyperloglog.h" #include "hash.h" +#include "inner/logger.h" #include "strfmt.h" #include #include @@ -15,7 +16,9 @@ HyperLogLog::HyperLogLog(int precision) { if (p_ < 4 || p_ > 20) { - throw std::invalid_argument("Precision must be between 4 and 20"); + std::string errMsg = "Precision must be between 4 and 20"; + CUTL_ERROR(errMsg); + throw std::invalid_argument(errMsg); } alpha_ = compute_alpha(); } @@ -87,7 +90,9 @@ void HyperLogLog::merge(const HyperLogLog& other) { if (m_ != other.m_) { - throw std::invalid_argument("Precision mismatch"); + std::string errMsg("Precision mismatch"); + CUTL_ERROR(errMsg); + throw std::invalid_argument(errMsg); } for (int i = 0; i < m_; i++) @@ -105,7 +110,9 @@ void HyperLogLog::from_string(const std::string& text) { if (!isxdigit(c)) { - throw std::runtime_error("Invalid hexadecimal string"); + std::string errMsg("Invalid hexadecimal string"); + CUTL_ERROR(errMsg); + throw std::runtime_error(errMsg); } } diff --git a/src/usage_demo/hyperloglog.hpp b/src/usage_demo/hyperloglog.hpp index 7ddbb35..e88c6ff 100644 --- a/src/usage_demo/hyperloglog.hpp +++ b/src/usage_demo/hyperloglog.hpp @@ -1,10 +1,12 @@ #include "common.hpp" #include "common_util/hyperloglog.h" +#include "common_util/timecount.h" -// 测试函数 -void testUsage() +// Test function +static void TestBasicUsage() { - std::cout << "------ 基础功能使用 ------" << std::endl; + PrintSubTitle("basic usage"); + cutl::HyperLogLog hll(15); hll.add("abc"); hll.add("def"); @@ -15,9 +17,10 @@ void testUsage() std::cout << "count2: " << hll.count() << std::endl; } -void testMerge() +static void TestMerge() { - std::cout << "------ merge功能演示 ------" << std::endl; + PrintSubTitle("TestMerge"); + cutl::HyperLogLog hll_01(14); cutl::HyperLogLog hll_02(14); @@ -27,13 +30,13 @@ void testMerge() hll_02.add("element-" + std::to_string(i) + "-" + std::to_string(rand())); } - std::cout << "hll_01 实际值: 1000, 估算值: " << hll_01.count() << std::endl; - std::cout << "hll_02 实际值: 1000, 估算值: " << hll_02.count() << std::endl; + std::cout << "hll_01 actual value: 1000, estimated value: " << hll_01.count() << std::endl; + std::cout << "hll_02 actual value: 1000, estimated value: " << hll_02.count() << std::endl; hll_01.merge(hll_02); - std::cout << "合并后 hll_01 估算值: " << hll_01.count() << std::endl; + std::cout << "hll_01 estimated value after merging: " << hll_01.count() << std::endl; } -// 生成64位随机哈希(用于测试) +// Generate 64-bit random hash (for testing) uint64_t generateRandomHash() { static std::random_device rd; @@ -42,68 +45,64 @@ uint64_t generateRandomHash() return dis(gen); } -// 测试函数 -void test64BitPerformance() +// Test function +static void TestPerformance() { - std::cout << "====== 64位 cutl::HyperLogLog 性能测试 ======" << std::endl; + PrintSubTitle("TestPerformance"); - cutl::HyperLogLog hll(14); - const uint64_t actualCount = 10000; // 1000万 + cutl::timecount counter("TestPerformance"); - std::cout << "测试数据量: " << actualCount << std::endl; - std::cout << "哈希空间: 2^64 = " << ((1ULL << 63) * 2 - 1) << std::endl; + cutl::HyperLogLog hll(14); + const uint64_t actualCount = 10000; // 10 million - auto start = std::chrono::high_resolution_clock::now(); + std::cout << "Test data volume: " << actualCount << std::endl; for (uint64_t i = 0; i < actualCount; i++) { - // 使用64位哈希值直接添加 + // Add using 64-bit hash value directly std::string str = std::string("element_") + std::to_string(i) + "_" + std::to_string(generateRandomHash()); hll.add(std::move(str)); } - auto end = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast(end - start); - uint64_t estimated = hll.count(); double error = std::abs(static_cast(estimated) - actualCount) / actualCount; - std::cout << "实际基数: " << actualCount << std::endl; - std::cout << "估计基数: " << estimated << std::endl; - std::cout << "相对误差: " << std::fixed << std::setprecision(4) << error * 100 << "%" + std::cout << "Actual cardinality: " << actualCount << std::endl; + std::cout << "Estimated cardinality: " << estimated << std::endl; + std::cout << "Actual error: " << std::fixed << std::setprecision(4) << error * 100 << "%" << std::endl; - std::cout << "理论误差: " << std::fixed << std::setprecision(4) + std::cout << "Theoretical error: " << std::fixed << std::setprecision(4) << hll.get_theoretical_error() * 100 << "%" << std::endl; - std::cout << "处理时间: " << duration.count() << "ms" << std::endl; } -void testCollisionResistance() +static void TestCollisionResistance() { - std::cout << "\n====== 碰撞抵抗测试 ======" << std::endl; + PrintSubTitle("TestCollisionResistance"); + // Test a large number of similar strings: 1 million cutl::HyperLogLog hll(12); - - // 测试大量相似字符串:100万 for (int i = 0; i < 1000000; i++) { hll.add("user_" + std::to_string(i) + "_session_" + std::to_string(rand())); } - std::cout << "100万相似字符串估计: " << hll.count() << std::endl; + std::cout << "Estimation for 1 million similar strings: " << hll.count() << std::endl; } void TestHyperLogLog() { + PrintTitle("TestHyperLogLog"); + try { - testUsage(); - testMerge(); - test64BitPerformance(); - testCollisionResistance(); + TestBasicUsage(); + TestMerge(); + TestPerformance(); + TestCollisionResistance(); } catch (const std::exception& e) { - std::cerr << "错误: " << e.what() << std::endl; + std::cerr << "Error: " << e.what() << std::endl; } } \ No newline at end of file -- Gitee From 36c4fe3b8f995ed36fda2a18c0fe81e6d09579fb Mon Sep 17 00:00:00 2001 From: Spencer Date: Thu, 23 Oct 2025 12:38:35 +0800 Subject: [PATCH 38/40] fix: common_util.h and README.md --- README.md | 120 ++++++++++++++++-------------- include/common_util/common_util.h | 13 +++- 2 files changed, 77 insertions(+), 56 deletions(-) diff --git a/README.md b/README.md index ba3abb9..6c553bd 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ @mainpage +[![zread](https://img.shields.io/badge/Ask_Zread-_.svg?style=flat&color=00b0aa&labelColor=000000&logo=data%3Aimage%2Fsvg%2Bxml%3Bbase64%2CPHN2ZyB3aWR0aD0iMTYiIGhlaWdodD0iMTYiIHZpZXdCb3g9IjAgMCAxNiAxNiIgZmlsbD0ibm9uZSIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KPHBhdGggZD0iTTQuOTYxNTYgMS42MDAxSDIuMjQxNTZDMS44ODgxIDEuNjAwMSAxLjYwMTU2IDEuODg2NjQgMS42MDE1NiAyLjI0MDFWNC45NjAxQzEuNjAxNTYgNS4zMTM1NiAxLjg4ODEgNS42MDAxIDIuMjQxNTYgNS42MDAxSDQuOTYxNTZDNS4zMTUwMiA1LjYwMDEgNS42MDE1NiA1LjMxMzU2IDUuNjAxNTYgNC45NjAxVjIuMjQwMUM1LjYwMTU2IDEuODg2NjQgNS4zMTUwMiAxLjYwMDEgNC45NjE1NiAxLjYwMDFaIiBmaWxsPSIjZmZmIi8%2BCjxwYXRoIGQ9Ik00Ljk2MTU2IDEwLjM5OTlIMi4yNDE1NkMxLjg4ODEgMTAuMzk5OSAxLjYwMTU2IDEwLjY4NjQgMS42MDE1NiAxMS4wMzk5VjEzLjc1OTlDMS42MDE1NiAxNC4xMTM0IDEuODg4MSAxNC4zOTk5IDIuMjQxNTYgMTQuMzk5OUg0Ljk2MTU2QzUuMzE1MDIgMTQuMzk5OSA1LjYwMTU2IDE0LjExMzQgNS42MDE1NiAxMy43NTk5VjExLjAzOTlDNS42MDE1NiAxMC42ODY0IDUuMzE1MDIgMTAuMzk5OSA0Ljk2MTU2IDEwLjM5OTlaIiBmaWxsPSIjZmZmIi8%2BCjxwYXRoIGQ9Ik0xMy43NTg0IDEuNjAwMUgxMS4wMzg0QzEwLjY4NSAxLjYwMDEgMTAuMzk4NCAxLjg4NjY0IDEwLjM5ODQgMi4yNDAxVjQuOTYwMUMxMC4zOTg0IDUuMzEzNTYgMTAuNjg1IDUuNjAwMSAxMS4wMzg0IDUuNjAwMUgxMy43NTg0QzE0LjExMTkgNS42MDAxIDE0LjM5ODQgNS4zMTM1NiAxNC4zOTg0IDQuOTYwMVYyLjI0MDFDMTQuMzk4NCAxLjg4NjY0IDE0LjExMTkgMS42MDAxIDEzLjc1ODQgMS42MDAxWiIgZmlsbD0iI2ZmZiIvPgo8cGF0aCBkPSJNNCAxMkwxMiA0TDQgMTJaIiBmaWxsPSIjZmZmIi8%2BCjxwYXRoIGQ9Ik00IDEyTDEyIDQiIHN0cm9rZT0iI2ZmZiIgc3Ryb2tlLXdpZHRoPSIxLjUiIHN0cm9rZS1saW5lY2FwPSJyb3VuZCIvPgo8L3N2Zz4K&logoColor=ffffff)](https://zread.ai/spencer-luo/common_util) + # 1. English ## 1.1. Source code @@ -13,7 +15,7 @@ common_util is a general - purpose utility library for C++. It is lightweight and easy to operate. -common_util is implemented using modern C++ syntax (C++11 and above), with "cutl" as the namespace. The naming convention of all its interfaces is consistent with that of the STL, and it can be used as an extensions of C++ STL. You can use common_util just like using the STL, For example: +common_util is implemented using modern C++ syntax (C++11), with "cutl" as the namespace. The naming convention of all its interfaces is consistent with that of the STL, and it can be used as an extensions of C++ STL. You can use common_util just like using the STL, For example: **Usage demo:** @@ -44,35 +46,40 @@ current time(local time): 2025-05-02 18:29:10.785 - **Multi-platform support**: Supports multiple platforms such as Linux/macOS/Windows, it can be compiled and run on multiple platforms. - **Clean code**: The code style is clean, and the naming convention is consistent with the STL, you can use this library just like use the C++ standard library. - **Comprehensive documentation**: All exported header files have detail comments, it has complete API reference and usage demos. -- **C++ version requirement**: C++11 and above versions. +- **C++ version requirement**: This repository is implemented using C++11, It is support the projects with C++11 or higher versions. ## 1.4. Module Description -| Header File | Main Function | -| :-- | :-- | -| `algoutil.h` | A supplement to ``, providing some commonly used algorithm functions, such as those that were not available in C++11 but have been added in later versions. | -| `color.h` | A class for encapsulating ARGB color values. | -| `common_util.h` | The import header file of common_util contains all other header files. | -| `config.h` | Provides the library initialization function, as well as functions to obtain the library version and name. | -| `datetime.h` | A simple date and time class based on the system clock. | -| `dlloader.h` | A dynamic loader for dynamic libraries (shared libraries). | -| `eventloop.h` | An event loop that supports ordinary tasks and timer tasks (timer tasks support specifying the number of executions and cancellation). The execution of tasks comes in single-thread(`eventloop`) and multi-thread(`multithread_eventloop`) versions. | -| `filepath.h` | Operations on file paths, such as file path concatenation and file type judgment. | -| `filetype.h` | Definitions of file types. | -| `fileutil.h` | Operations on the file system, such as file creation/deletion, file reading/writing, etc. | -| `logtype.h` | Type definitions related to log recording. | -| `print.h` | Helper functions for console printing, including printing arrays、vectors、maps of basic types, and printing colored strings. | -| `singleton.h` | Template classes and macro definitions for the singleton pattern. | -| `strfmt.h` | String formatting, such as time formatting, file size formatting, converting int to hexadecimal, etc. | -| `strutil.h` | Utility functions for string operations, such as case conversion, string split, string join, etc. | -| `sysutil.h` | System utility functions, such as system calls, obtaining the CPU architecture/endianness, etc. | -| `threadpool.h` | A thread pool, a lightweight and simple implementation of a thread pool. | -| `threadutil.h` | Thread - related utility functions, such as setting the thread name, obtaining the thread ID, etc. | -| `timecount.h` | A time counter for measuring the running time of functions. | -| `timer.h` | A timer that supports single-task timer (delayed execution) and repeating-task timer (periodic execution). | -| `timeutil.h` | Utility functions for time processing, such as time unit conversion, obtaining timestamps, etc. | -| `version.h` | Definition of the version number of the common_util library. | -| `verutil.h` | Version number processing, such as parsing version number substrings from strings, version number comparison, etc. | +| Functional Category | Header File | Main Functions | +| :------------ | :-------------- | :----------------------------------------------------------------------------------------------------- | +| File Operations | `filepath.h` | Operations on file paths, such as path concatenation and file type determination. | +| File Operations | `filetype.h` | Definition of file types. | +| File Operations | `fileutil.h` | Operations on the file system, such as file creation/deletion, file reading/writing, etc. | +| String Processing | `strfmt.h` | String formatting, such as time formatting, file size formatting, conversion of int to hexadecimal, etc. | +| String Processing | `strutil.h` | Utility functions for string operations, such as case conversion, string splitting, string concatenation, etc. | +| String Processing | `verutil.h` | Version number handling, such as parsing version number substrings from strings and version number comparison. | +| Date and Time | `datetime.h` | A simple date and time class based on the system clock. | +| Date and Time | `timecount.h` | A timer for measuring the running time of functions. | +| Date and Time | `timer.h` | Timers, supporting single - task timers (delayed execution) and repeating - task timers (periodic execution). | +| Date and Time | `timeutil.h` | Utility functions for time processing, such as time unit conversion and obtaining timestamps. | +| Concurrent Programming | `threadpool.h` | Thread pool, a lightweight and simple implementation of a thread pool. | +| Concurrent Programming | `threadutil.h` | Utility functions related to threads, such as setting thread names and obtaining thread IDs. | +| Concurrent Programming | `eventloop.h` | Event loop, supporting normal tasks and timed tasks (timed tasks support specifying the number of executions and cancellation). Task execution comes in two versions: single - thread (`eventloop`) and multi - thread (`multithread_eventloop`). | +| System Utilities | `sysutil.h` | System utility functions, such as system calls, obtaining CPU architecture/endianness, etc. | +| System Utilities | `dlloader.h` | Dynamic loader for dynamic libraries (shared libraries). | +| Common Algorithms | `algoutil.h` | Supplementary to ``, providing some commonly used algorithm functions, such as those not available in C++11 but added in later versions. | +| Common Algorithms | `lrucache.h` | High - performance LRU algorithm template class with an average time complexity of `O(1)` for both `get` and `put`. | +| Common Algorithms | `hash.h` | Provides common hash function algorithms | +| Common Algorithms | `bitmap.h` | An efficient Bitmap data structure class, and provides multiple variant subtypes: `dynamic_bitmap`, `roaring_bitmap`, etc. | +| Common Algorithms | `bloomfilter.h` | Bloom filter algorithm | +| Common Algorithms | `hyperloglog.h` | HyperLogLog algorithm | +| Logging and Printing | `print.h` | Helper functions for console printing, including printing arrays, vectors, maps of basic types, and printing strings with colors. | +| Type Wrappers | `color.h` | Encapsulation class for ARGB color values. | +| Auxiliary Programming | `singleton.h` | Template class and macro definitions for the singleton pattern. | +| Others | `logtype.h` | Type definitions related to log records. | +| Others | `config.h` | Provides library initialization functions, and functions to obtain the library's version and name. | +| Others | `version.h` | Definition of the version number of the `common_util` library. | +| Others | `common_util.h` | The import header file for `common_util`, including all other header files. | ## 1.5. Testing Platforms @@ -156,7 +163,7 @@ Looking forward to your participation! common_util是C++的一个通用工具库,轻量级、操作简便。 -common_util采用现代C++语法(C++11及以上)实现,使用cutl作为命名空间,所有接口的命名方式与STL保持一致,可以作为STL库的一个补充。你可以像使用STL一样使用common_util,如: +common_util采用现代C++语法(C++11)实现,使用cutl作为命名空间,所有接口的命名方式与STL保持一致,可以作为STL库的一个补充。你可以像使用STL一样使用common_util,如: **用法示例:** @@ -187,35 +194,40 @@ current time(local time): 2025-05-02 18:29:10.785 - **多平台支持**: 支持Linux/macOS/Windows等多个平台,一套代码多平台编译。 - **代码整洁**: 代码风格整洁,命名方式与STL保持一致,你可以像使用STL一样使用本库。 - **文档规范全面**: 所有对外接口的头文件都有详细的代码注释,且有完整的API文档和使用Demo,可供查阅。 -- **C++版本要求**: C++11及以上版本。 +- **C++版本要求**: 本仓库采用C++11实现,C++11及以上的项目均支持。 ## 2.4. 模块说明 -| 头文件 | 主要功能 | -| :-------------- | :----------------------------------------------------------------------------------------------------- | -| `algoutil.h` | ``的补充,提供一些常用的算法函数,如:C++11没有,但是后面版本已加入的算法函数。 | -| `color.h` | ARGB颜色值的封装类。 | -| `common_util.h` | common_util的导入头文件,包含所有其他头文件。 | -| `config.h` | 提供库的初始化函数,提供库的版本和名称获取函数。 | -| `datetime.h` | 基于系统时钟的简易的日期时间类。 | -| `dlloader.h` | 动态库(共享库)的动态加载器。 | -| `eventloop.h` | 事件循环,支持:普通任务、定时任务(定时任务支持指定次数和取消),任务的执行分为单线程(`eventloop`)和多线程(`multithread_eventloop`)两个版本。 | -| `filepath.h` | 文件路径的操作,如路径拼接、文件类型判断等。 | -| `filetype.h` | 文件类型的定义。 | -| `fileutil.h` | 文件系统的操作,如:文件的创建/删除、文件的读/写等。 | -| `logtype.h` | Log记录相关的类型定义。 | -| `print.h` | 控制台打印的辅助函数,包括:打印基础类型的数组、vector、map等,带颜色的字符串打印。 | -| `singleton.h` | 单例模式的模板类和宏定义。 | -| `strfmt.h` | 字符串格式化,如时间格式化、文件大小格式化、int转十六进制等。 | -| `strutil.h` | 字符串操作的工具函数,如大小写转换、字符串拆分、字符串拼接等。 | -| `sysutil.h` | 系统工具函数,如系统调用、获取CPU的架构/大小端等。 | -| `threadpool.h` | 线程池,轻量级简单版本的线程池实现。 | -| `threadutil.h` | 线程相关的工具函数,如设置线程名称、获取线程ID等。 | -| `timecount.h` | 函数运行的使用时间计时器。 | -| `timer.h` | 定时器,支持:单次任务的定时器(延迟执行)、重复任务的定时器(周期执行)。 | -| `timeutil.h` | 时间处理的工具函数,如时间单位的转换、时间戳的获取等。 | -| `version.h` | common_util库的版本号定义。 | -| `verutil.h` | 版本号处理,如:从字符串中解析版本号子串、版本号比较等。 | +| 功能分类 | 头文件 | 主要功能 | +| :------------ | :-------------- | :----------------------------------------------------------------------------------------------------- | +| 文件操作 | `filepath.h` | 文件路径的操作,如路径拼接、文件类型判断等。 | +| 文件操作 | `filetype.h` | 文件类型的定义。 | +| 文件操作 | `fileutil.h` | 文件系统的操作,如:文件的创建/删除、文件的读/写等。 | +| 字符串处理 | `strfmt.h` | 字符串格式化,如时间格式化、文件大小格式化、int转十六进制等。 | +| 字符串处理 | `strutil.h` | 字符串操作的工具函数,如大小写转换、字符串拆分、字符串拼接等。 | +| 字符串处理 | `verutil.h` | 版本号处理,如:从字符串中解析版本号子串、版本号比较等。 | +| 时间日期 | `datetime.h` | 基于系统时钟的简易的日期时间类。 | +| 时间日期 | `timecount.h` | 函数运行的使用时间计时器。 | +| 时间日期 | `timer.h` | 定时器,支持:单次任务的定时器(延迟执行)、重复任务的定时器(周期执行)。 | +| 时间日期 | `timeutil.h` | 时间处理的工具函数,如时间单位的转换、时间戳的获取等。 | +| 并发编程 | `threadpool.h` | 线程池,轻量级简单版本的线程池实现。 | +| 并发编程 | `threadutil.h` | 线程相关的工具函数,如设置线程名称、获取线程ID等。 | +| 并发编程 | `eventloop.h` | 事件循环,支持:普通任务、定时任务(定时任务支持指定次数和取消),任务的执行分为单线程(`eventloop`)和多线程(`multithread_eventloop`)两个版本。 | +| 系统工具 | `sysutil.h` | 系统工具函数,如系统调用、获取CPU的架构/大小端等。 | +| 系统工具 | `dlloader.h` | 动态库(共享库)的动态加载器。 | +| 常用算法 | `algoutil.h` | ``的补充,提供一些常用的算法函数,如:C++11没有,但是后面版本已加入的算法函数。 | +| 常用算法 | `lrucache.h` | 高性能LRU算法模板类,`get`和`put`的平均时间复杂度都是`O(1)`。 | +| 常用算法 | `hash.h` | 提供常用的哈希函数算法 | +| 常用算法 | `bitmap.h` | 高效的位图(Bitmap)数据结构类,并提供多个变种的子类型:dynamic_bitmap、roaring_bitmap等。 | +| 常用算法 | `bloomfilter.h` | 布隆过滤器算法 | +| 常用算法 | `hyperloglog.h` | HyperLogLog算法 | +| 日志与打印 | `print.h` | 控制台打印的辅助函数,包括:打印基础类型的数组、vector、map等,带颜色的字符串打印。 | +| 类型包装器 | `color.h` | ARGB颜色值的封装类。 | +| 辅助编程 | `singleton.h` | 单例模式的模板类和宏定义。 | +| 其他 | `logtype.h` | Log记录相关的类型定义。 | +| 其他 | `config.h` | 提供库的初始化函数,提供库的版本和名称获取函数。 | +| 其他 | `version.h` | common_util库的版本号定义。 | +| 其他 | `common_util.h` | common_util的导入头文件,包含所有其他头文件。 | ## 2.5. 已验证的平台 diff --git a/include/common_util/common_util.h b/include/common_util/common_util.h index 01f9faa..2bafca3 100644 --- a/include/common_util/common_util.h +++ b/include/common_util/common_util.h @@ -20,20 +20,29 @@ #pragma once #include "algoutil.h" +#include "bitmap.h" +#include "bloomfilter.h" #include "color.h" #include "config.h" #include "datetime.h" +#include "dlloader.h" #include "eventloop.h" #include "filepath.h" +// #include "filetype.h" #include "fileutil.h" -#include "logtype.h" +#include "hash.h" +#include "hyperloglog.h" +// #include "logtype.h" +#include "lrucache.h" #include "print.h" #include "singleton.h" #include "strfmt.h" +#include "strutil.h" #include "sysutil.h" #include "threadpool.h" #include "threadutil.h" #include "timecount.h" #include "timer.h" #include "timeutil.h" -#include "verutil.h" +// #include "version.h" +#include "verutil.h" \ No newline at end of file -- Gitee From 8a892f3652e48ba9581f01e045238a0445671259 Mon Sep 17 00:00:00 2001 From: Spencer Date: Thu, 23 Oct 2025 14:49:24 +0800 Subject: [PATCH 39/40] fix: hash.cpp --- src/common_util/hash.cpp | 40 +++++++++++++++++++------------------- src/common_util/strfmt.cpp | 2 +- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/common_util/hash.cpp b/src/common_util/hash.cpp index 8a1403c..a069dec 100644 --- a/src/common_util/hash.cpp +++ b/src/common_util/hash.cpp @@ -130,42 +130,42 @@ uint32_t hash_lookup3(const void* key, size_t length, uint32_t initval) length -= 12; } - // 处理最后1-12个字节 + // 处理最后1-12个字节 - 使用注释代替fallthrough属性 switch (length) { case 12: c += (static_cast(k[11]) << 24); - [[fallthrough]]; + // fall through case 11: c += (static_cast(k[10]) << 16); - [[fallthrough]]; + // fall through case 10: c += (static_cast(k[9]) << 8); - [[fallthrough]]; + // fall through case 9: c += k[8]; - [[fallthrough]]; + // fall through case 8: b += (static_cast(k[7]) << 24); - [[fallthrough]]; + // fall through case 7: b += (static_cast(k[6]) << 16); - [[fallthrough]]; + // fall through case 6: b += (static_cast(k[5]) << 8); - [[fallthrough]]; + // fall through case 5: b += k[4]; - [[fallthrough]]; + // fall through case 4: a += (static_cast(k[3]) << 24); - [[fallthrough]]; + // fall through case 3: a += (static_cast(k[2]) << 16); - [[fallthrough]]; + // fall through case 2: a += (static_cast(k[1]) << 8); - [[fallthrough]]; + // fall through case 1: a += k[0]; break; @@ -226,10 +226,10 @@ uint32_t hash_murmur3_32(const void* key, size_t len, uint32_t seed) { case 3: k1 ^= tail[2] << 16; - [[fallthrough]]; + // fall through case 2: k1 ^= tail[1] << 8; - [[fallthrough]]; + // fall through case 1: k1 ^= tail[0]; k1 *= c1; @@ -279,22 +279,22 @@ uint64_t hash_murmur3_64(const void* key, size_t len, uint64_t seed) { case 7: h ^= static_cast(data[6]) << 48; - [[fallthrough]]; + // fall through case 6: h ^= static_cast(data[5]) << 40; - [[fallthrough]]; + // fall through case 5: h ^= static_cast(data[4]) << 32; - [[fallthrough]]; + // fall through case 4: h ^= static_cast(data[3]) << 24; - [[fallthrough]]; + // fall through case 3: h ^= static_cast(data[2]) << 16; - [[fallthrough]]; + // fall through case 2: h ^= static_cast(data[1]) << 8; - [[fallthrough]]; + // fall through case 1: h ^= static_cast(data[0]); h *= m; diff --git a/src/common_util/strfmt.cpp b/src/common_util/strfmt.cpp index 3ebf024..df47b45 100644 --- a/src/common_util/strfmt.cpp +++ b/src/common_util/strfmt.cpp @@ -1,4 +1,4 @@ -/** +/** * @copyright Copyright (c) 2024, Spencer.Luo. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); -- Gitee From 35e7bfe60d0f145de70993040f1efa60f3d57e8d Mon Sep 17 00:00:00 2001 From: Spencer Date: Thu, 23 Oct 2025 14:52:05 +0800 Subject: [PATCH 40/40] feat: change version to 1.4.0 --- CMakeLists.txt | 2 +- include/common_util/version.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e08f4ca..a3c2a6e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ message(STATUS "--------- begin cmake ---------") # cmake version cmake_minimum_required(VERSION 3.28.3) # project name -project(common_util VERSION 1.3.0 LANGUAGES CXX) +project(common_util VERSION 1.4.0 LANGUAGES CXX) # 构建时间 # build time string(TIMESTAMP BUILD_TIME "%y-%m-%d %H:%M:%S") diff --git a/include/common_util/version.h b/include/common_util/version.h index abd12d2..ff33d45 100644 --- a/include/common_util/version.h +++ b/include/common_util/version.h @@ -28,4 +28,4 @@ * @brief Library version. * */ -#define CUTL_VERSION "1.3.0" +#define CUTL_VERSION "1.4.0" -- Gitee