diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000000000000000000000000000000000..ab4f833923aea4394dc49cf3d25001e19e6b3391 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "binlogconvert/deps/googletest"] + path = binlogconvert/deps/googletest + url = https://github.com/google/googletest.git diff --git a/binlogconvert/AUTHORS b/binlogconvert/AUTHORS new file mode 100644 index 0000000000000000000000000000000000000000..e89ccf6ab3296fa0cc798472ee1bc9eb2a89938e --- /dev/null +++ b/binlogconvert/AUTHORS @@ -0,0 +1,4 @@ +# This is the official list of loft authors for copyright purposes. + +Yincong Lyu +Weihao Li diff --git a/binlogconvert/CMakeLists.txt b/binlogconvert/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..d0725d2d6e37696207d55cbe62d8809f2abf7cdd --- /dev/null +++ b/binlogconvert/CMakeLists.txt @@ -0,0 +1,49 @@ +cmake_minimum_required(VERSION 3.16) + +# -- Configure project compiler options +project(sql2bl LANGUAGES C CXX) +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_EXPORT_COMPILE_COMMANDS true) +set(CMAKE_POSITION_INDEPENDENT_CODE ON) # -fPIC + +# -- CMake compile options +option(NDEBUG 1) # for debug +option(LOFT_TESTING "Build unit tests" YES) # for test + +# -- Manage Compile Options w/ ASAN flag +if(NDEBUG) + add_compile_options( + -O0 -Werror -Wno-attributes + -fvisibility=default + -DNDEBUG=1 + ) +else() + add_compile_options(-fsanitize=address) + add_link_options(-fsanitize=address) + add_compile_options( + -O0 -ggdb -Werror -Wno-attributes + -fvisibility=default + ) +endif() + +# -- Output directory +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) + +# -- Includes +include_directories(${PROJECT_SOURCE_DIR}/include) + +# -- third Libraries +add_subdirectory(deps) + +# Add subdirectory for source +add_subdirectory(src) + +# Manage testing option +enable_testing() + +if(LOFT_TESTING) + add_subdirectory(test) +endif() diff --git a/binlogconvert/LICENSE b/binlogconvert/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..f63f5a9cf3498818a73068495709cceed67efd6a --- /dev/null +++ b/binlogconvert/LICENSE @@ -0,0 +1,194 @@ +木兰宽松许可证,第2版 + +木兰宽松许可证,第2版 + +2020年1月 http://license.coscl.org.cn/MulanPSL2 + +您对“软件”的复制、使用、修改及分发受木兰宽松许可证,第2版(“本许可证”)的如下条款的约束: + +0. 定义 + +“软件” 是指由“贡献”构成的许可在“本许可证”下的程序和相关文档的集合。 + +“贡献” 是指由任一“贡献者”许可在“本许可证”下的受版权法保护的作品。 + +“贡献者” 是指将受版权法保护的作品许可在“本许可证”下的自然人或“法人实体”。 + +“法人实体” 是指提交贡献的机构及其“关联实体”。 + +“关联实体” 是指,对“本许可证”下的行为方而言,控制、受控制或与其共同受控制的机构,此处的控制是 +指有受控方或共同受控方至少50%直接或间接的投票权、资金或其他有价证券。 + +1. 授予版权许可 + +每个“贡献者”根据“本许可证”授予您永久性的、全球性的、免费的、非独占的、不可撤销的版权许可,您可 +以复制、使用、修改、分发其“贡献”,不论修改与否。 + +2. 授予专利许可 + +每个“贡献者”根据“本许可证”授予您永久性的、全球性的、免费的、非独占的、不可撤销的(根据本条规定 +撤销除外)专利许可,供您制造、委托制造、使用、许诺销售、销售、进口其“贡献”或以其他方式转移其“贡 +献”。前述专利许可仅限于“贡献者”现在或将来拥有或控制的其“贡献”本身或其“贡献”与许可“贡献”时的“软 +件”结合而将必然会侵犯的专利权利要求,不包括对“贡献”的修改或包含“贡献”的其他结合。如果您或您的“ +关联实体”直接或间接地,就“软件”或其中的“贡献”对任何人发起专利侵权诉讼(包括反诉或交叉诉讼)或 +其他专利维权行动,指控其侵犯专利权,则“本许可证”授予您对“软件”的专利许可自您提起诉讼或发起维权 +行动之日终止。 + +3. 无商标许可 + +“本许可证”不提供对“贡献者”的商品名称、商标、服务标志或产品名称的商标许可,但您为满足第4条规定 +的声明义务而必须使用除外。 + +4. 分发限制 + +您可以在任何媒介中将“软件”以源程序形式或可执行形式重新分发,不论修改与否,但您必须向接收者提供“ +本许可证”的副本,并保留“软件”中的版权、商标、专利及免责声明。 + +5. 免责声明与责任限制 + +“软件”及其中的“贡献”在提供时不带任何明示或默示的担保。在任何情况下,“贡献者”或版权所有者不对 +任何人因使用“软件”或其中的“贡献”而引发的任何直接或间接损失承担责任,不论因何种原因导致或者基于 +何种法律理论,即使其曾被建议有此种损失的可能性。 + +6. 语言 + +“本许可证”以中英文双语表述,中英文版本具有同等法律效力。如果中英文版本存在任何冲突不一致,以中文 +版为准。 + +条款结束 + +如何将木兰宽松许可证,第2版,应用到您的软件 + +如果您希望将木兰宽松许可证,第2版,应用到您的新软件,为了方便接收者查阅,建议您完成如下三步: + +1, 请您补充如下声明中的空白,包括软件名、软件的首次发表年份以及您作为版权人的名字; + +2, 请您在软件包的一级目录下创建以“LICENSE”为名的文件,将整个许可证文本放入该文件中; + +3, 请将如下声明文本放入每个源文件的头部注释中。 + +Copyright (c) [Year] [name of copyright holder] +[Software Name] is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan +PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. + +Mulan Permissive Software License,Version 2 + +Mulan Permissive Software License,Version 2 (Mulan PSL v2) + +January 2020 http://license.coscl.org.cn/MulanPSL2 + +Your reproduction, use, modification and distribution of the Software shall +be subject to Mulan PSL v2 (this License) with the following terms and +conditions: + +0. Definition + +Software means the program and related documents which are licensed under +this License and comprise all Contribution(s). + +Contribution means the copyrightable work licensed by a particular +Contributor under this License. + +Contributor means the Individual or Legal Entity who licenses its +copyrightable work under this License. + +Legal Entity means the entity making a Contribution and all its +Affiliates. + +Affiliates means entities that control, are controlled by, or are under +common control with the acting entity under this License, ‘control’ means +direct or indirect ownership of at least fifty percent (50%) of the voting +power, capital or other securities of controlled or commonly controlled +entity. + +1. Grant of Copyright License + +Subject to the terms and conditions of this License, each Contributor hereby +grants to you a perpetual, worldwide, royalty-free, non-exclusive, +irrevocable copyright license to reproduce, use, modify, or distribute its +Contribution, with modification or not. + +2. Grant of Patent License + +Subject to the terms and conditions of this License, each Contributor hereby +grants to you a perpetual, worldwide, royalty-free, non-exclusive, +irrevocable (except for revocation under this Section) patent license to +make, have made, use, offer for sale, sell, import or otherwise transfer its +Contribution, where such patent license is only limited to the patent claims +owned or controlled by such Contributor now or in future which will be +necessarily infringed by its Contribution alone, or by combination of the +Contribution with the Software to which the Contribution was contributed. +The patent license shall not apply to any modification of the Contribution, +and any other combination which includes the Contribution. If you or your +Affiliates directly or indirectly institute patent litigation (including a +cross claim or counterclaim in a litigation) or other patent enforcement +activities against any individual or entity by alleging that the Software or +any Contribution in it infringes patents, then any patent license granted to +you under this License for the Software shall terminate as of the date such +litigation or activity is filed or taken. + +3. No Trademark License + +No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements in section 4. + +4. Distribution Restriction + +You may distribute the Software in any medium with or without modification, +whether in source or executable forms, provided that you provide recipients +with a copy of this License and retain copyright, patent, trademark and +disclaimer statements in the Software. + +5. Disclaimer of Warranty and Limitation of Liability + +THE SOFTWARE AND CONTRIBUTION IN IT ARE PROVIDED WITHOUT WARRANTIES OF ANY +KIND, EITHER EXPRESS OR IMPLIED. IN NO EVENT SHALL ANY CONTRIBUTOR OR +COPYRIGHT HOLDER BE LIABLE TO YOU FOR ANY DAMAGES, INCLUDING, BUT NOT +LIMITED TO ANY DIRECT, OR INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES ARISING +FROM YOUR USE OR INABILITY TO USE THE SOFTWARE OR THE CONTRIBUTION IN IT, NO +MATTER HOW IT’S CAUSED OR BASED ON WHICH LEGAL THEORY, EVEN IF ADVISED OF +THE POSSIBILITY OF SUCH DAMAGES. + +6. Language + +THIS LICENSE IS WRITTEN IN BOTH CHINESE AND ENGLISH, AND THE CHINESE VERSION +AND ENGLISH VERSION SHALL HAVE THE SAME LEGAL EFFECT. IN THE CASE OF +DIVERGENCE BETWEEN THE CHINESE AND ENGLISH VERSIONS, THE CHINESE VERSION +SHALL PREVAIL. + +END OF THE TERMS AND CONDITIONS + +How to Apply the Mulan Permissive Software License,Version 2 +(Mulan PSL v2) to Your Software + +To apply the Mulan PSL v2 to your work, for easy identification by +recipients, you are suggested to complete following three steps: + +i. Fill in the blanks in following statement, including insert your software +name, the year of the first publication of your software, and your name +identified as the copyright owner; + +ii. Create a file named "LICENSE" which contains the whole context of this +License in the first directory of your software package; + +iii. Attach the statement to the appropriate annotated syntax at the +beginning of each source file. + +Copyright (c) [Year] [name of copyright holder] +[Software Name] is licensed under Mulan PSL v2. +You can use this software according to the terms and conditions of the Mulan +PSL v2. +You may obtain a copy of Mulan PSL v2 at: + http://license.coscl.org.cn/MulanPSL2 +THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY +KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +See the Mulan PSL v2 for more details. diff --git a/binlogconvert/README.md b/binlogconvert/README.md new file mode 100644 index 0000000000000000000000000000000000000000..37c8af0adbab51d50aaa0e896aa955e4ed602fb9 --- /dev/null +++ b/binlogconvert/README.md @@ -0,0 +1,125 @@ +# 一、Build +1. 下载 FlatBuffers 库 +```bash +git clone https://github.com/google/flatbuffers.git + +cd flatbuffers +mkdir build && cd build +cmake .. +make -j$(nproc) +make install +``` +2. 编译项目得到 so 文件 +```bash +# 1. Clone the repository +git clone git@gitee.com:coonger/cantian-connector-mysql.git +git checkout cantian24.12 +cd binlogconvert +git submodule update --init --recursive # Make sure the google test framework is downloaded + +# 2. The default compilation parameters are not to enable debug and test mode +./build.sh + +# 3. libsql2bl.so in the ./build/lib directory +find ./build/lib/ -name '*.so' + +# 4. Confirm that the so dynamic library file can expose the ELF symbol table +readelf -s build/lib/libsql2bl.so | grep SetBinlogPath +readelf -s build/lib/libsql2bl.so | grep ConvertFlatBufferToBinlog +readelf -s build/lib/libsql2bl.so | grep GetLastScnAndSeq +``` +# 二、测试 +## 2.1 单元测试 +```bash +# 1. 目录下./build.sh 编译脚本,修改编译选项 -DLOFT_TESTING=YES + +# 2. 查看测试结果 +./build/test/event_test # --> event 级别测试,构造 DDL | DML 的 binlog 文件内的 event 存储格式正确 +mysqlbinlog -vv --base64-output=decode-rows --hexdump "event_file_name" + +./build/test/fbs_test +# DDL_TEST, DML_TEST # --> 读取 flatbuffer 内容无误 +# SQL_TEST # --> 转换一条 DDL | DML,连续转换多条 DDL | DML,并通过 mysqlbinlog 回放工具证明无误 +mysqlbinlog "binlog_file_name" | mysql -u -p -P 3306 -h +``` +## 2.2 集成测试 +```bash +cd bin +g++ main.cpp -o test -std=c++17 -pthread -ldl +./test +``` +# 三、Main API 介绍 +1. 设置 binlog 文件写入的目录路径 +```c++ +@param[in] bashPathBytes 目录路径的字符数组 +@param[in] length bashPathBytes 的长度 +@param[in] maxSize binlog 文件的大小 +@param[in] threadNum 转换工作的线程数 +@param[in] capacity binlog 文件的总大小 +@param[in] expirationTime binlog 文件的过期时间 +RC SetBinlogPath(char *bashPathBytes, int length, long maxSize, int threadNum, long capacity, int expirationTime); +``` +2. 把 FlatBuffer 日志格式 转换成 binlog 日志格式 +```c++ +@param[in] fbStr 待转换的 flatbuffer 二进制数据 +@param[in] length fbStr 的长度 +@param[in] is_ddl 是否是 DDL 语句, true 表示 DDL, false 表示 DML +std::future ConvertFlatBufferToBinlog(char *fbStr, int length, bool is_ddl); +``` +3. 获取转换的进度 checkpoint +```c++ +@param[out] scn scn +@param[out] seq seq +@param[out] ckp checkpoint 记录在 controlinfo 文件中的第一行, 格式为 trxSeq-seq-scn +RC GetLastScnAndSeq(long *scn, long *seq, char **ckp); +``` +# 四、How To Use +1. 使用类对象的方式可参考单元测试代码 test/log_file_test.cpp +```c++ + // 1. 创建一个 LogFileManager 对象 + auto logFileManager = std::make_unique(); + + // 2. 设置写入binlog文件的目录,binlog文件的前缀名,默认是'teledb-bin', binlog文件的大小,默认是 20MB + // "/home/yincong/collectBin" 的字符数组 + char byteArray[] = "\x2f\x68\x6f\x6d\x65\x2f\x79\x69\x6e\x63\x6f\x6e\x67\x2f\x63\x6f\x6c\x6c\x65\x63\x74\x42\x69\x6e\x2f"; + logFileManager->SetBinlogPath(byteArray, 25, DEFAULT_BINLOG_FILE_SIZE, 4, DEFAULT_BINLOG_FILE_SIZE * 20, 3600); + + // 3. 异步投放任务 + std::vector> futures; // 存储所有的future + for (auto& buf: buffers) { // 待转换的 中间数据 + // true 表示 ddl,false 表示 dml + futures.push_back(logFileManager->ConvertFlatBufferToBinlog(std::move(buf.data()), sql_len, true)); + futures.push_back(logFileManager->ConvertFlatBufferToBinlog(std::move(buf.data()), sql_len, false)); + } + + for (auto& future : futures) { + RC result = future.get(); + if (result != RC::SUCCESS) { + LOG_ERROR("Transform task failed"); + } + } + + // 4. 查询转换进度 + long scn = 0; + long seq = 0; + std::string ckp = ""; + GetLastScnAndSeq(scn, seq, ckp); + + // 5. 程序退出,自动析构所有资源对象 +``` +2. 使用 so 动态库链接的方式可参考集成测试代码 bin/main.cpp + +# 五、Future Extension +如果后续会扩展关于 DDL / DML 二者的 Redo Log 的格式,可以在 ddl.fbs / dml.fbs 文件中额外定义其他字段 +1. 重新使用 flatc 工具生成对应的 C++ 代码 +```bash +flatc --cpp ddl.fbs +flatc --cpp dml.fbs +``` +2. 把生成的代码文件 ddl_generated.h / dml_generated.h 拷贝到 include/format 目录下 +```bash +cp ddl_generated.h include/format/ +cp dml_generated.h include/format/ +``` +# 六、Refs +- Some helper wheels in include/common are refer from: https://github.com/oceanbase/miniob/blob/main/src/common/ \ No newline at end of file diff --git a/binlogconvert/bin/main.cpp b/binlogconvert/bin/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5646e1994fea3abc4ab1942c4af38aa33d8beb40 --- /dev/null +++ b/binlogconvert/bin/main.cpp @@ -0,0 +1,237 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +class BufferReader { +public: + BufferReader(const char *buffer, unsigned long long length) noexcept : buffer_(buffer), ptr_(buffer), limit_(length) {} + ~BufferReader() = default; + + /** + * @brief 一次性读取 sizeof(T) 个 char byte,并将指针向前移动,读取已做小端处理 + */ + template + T read(unsigned char bytes = sizeof(T)) { + if (ptr_ + bytes > buffer_ + limit_) { + throw std::out_of_range("Attempt to read beyond buffer limit"); + } + T value = 0; + std::memcpy(reinterpret_cast(&value), ptr_, bytes); + ptr_ += bytes; + return (bytes > 1) ? letoh(value) : value; + } + + template + void memcpy(T destination, size_t length) { + if (ptr_ + length > buffer_ + limit_) { + throw std::out_of_range("Attempt to copy beyond buffer limit"); + } + std::memcpy(destination, ptr_, length); + ptr_ += length; + } + + /** + * @brief ptr 向前移动 length 个 byte + * @param length + */ + void forward(size_t length) { + if (ptr_ + length > buffer_ + limit_) { + throw std::out_of_range("Attempt to forward beyond buffer limit"); + } + ptr_ += length; + } + + unsigned long long position() const noexcept { + return ptr_ >= buffer_ ? ptr_ - buffer_ : limit_; + } + bool valid() const noexcept { + return ptr_ < buffer_ + limit_; + } + +private: + /** + * @brief 小端解释读出 value + */ + template + static T letoh(T value) { + if (std::is_same::value || std::is_same::value) { + return le16toh(value); + } else if (std::is_same::value || std::is_same::value) { + return le32toh(value); + } else if (std::is_same::value || std::is_same::value) { + return le64toh(value); + } else { + throw std::invalid_argument("Unsupported type for letoh"); + } + } + +private: + const char *buffer_; + const char *ptr_; + unsigned long long limit_; +}; + +class RedoLogFileReader { +public: + RedoLogFileReader() = default; + + ~RedoLogFileReader() { + close(); + } + + auto open(const char *filename) -> int { + filename_ = filename; + fd_ = ::open(filename, O_RDONLY); + if (fd_ < 0) { + return 1; + } + return 0; + } + + auto close() -> int { + if (fd_ < 0) { + return 1; + } + + ::close(fd_); + fd_ = -1; + return 0; + } + + auto readFromFile(const std::string &fileName) -> std::pair, size_t> { + FILE *file = fopen(fileName.c_str(), "rb"); + if (file == nullptr) { + std::cerr << "Failed to open file " << fileName << std::endl; + return {nullptr, 0}; // 返回空指针和大小为0 + } + + const size_t bufferSize = 4096; // 每次读取4KB数据 + char buffer[bufferSize]; + size_t readSize = 0; + size_t oneRead = 0; + + // 动态缓冲区大小控制,通过unique_ptr管理data + std::unique_ptr data; + size_t dataCapacity = 0; + + // 循环读取文件内容 + while (!feof(file)) { + memset(buffer, 0, sizeof(buffer)); + oneRead = fread(buffer, 1, sizeof(buffer), file); + if (ferror(file)) { + std::cerr << "Failed to read data from " << fileName << std::endl; + fclose(file); + return {nullptr, 0}; + } + + // 如果当前读取大小超过 data 的容量,重新分配 + if (readSize + oneRead > dataCapacity) { + dataCapacity = (readSize + oneRead) * 2; + std::unique_ptr newData = std::make_unique(dataCapacity); + + if (data) { + memcpy(newData.get(), data.get(), readSize); + } + data = std::move(newData); + } + + memcpy(data.get() + readSize, buffer, oneRead); + readSize += oneRead; + } + + fclose(file); + + // 调整最终大小,使其准确匹配已读取的数据量 + std::unique_ptr result = std::make_unique(readSize + 1); + memcpy(result.get(), data.get(), readSize); + result[readSize] = '\0'; + + return {std::move(result), readSize}; + } + +private: + int fd_ = -1; + std::string filename_; +}; + +const int MAX_SQL_LENGTH = 1024 * 1024; // 最大 flatbuffer 的大小为 1MB + +int main() { + + // 打开共享库 + void *handle = dlopen("../build/lib/libsql2bl.so", RTLD_LAZY); + if (!handle) { + fprintf(stderr, "Cannot load library: %s\n", dlerror()); + return 1; + } + // 找到 so 文件中的函数符号表 + typedef int (*SetBinlogPath_func)(char *, int, long, int, long, int); + SetBinlogPath_func SetBinlogPath = (SetBinlogPath_func)dlsym(handle, "SetBinlogPath"); + + typedef int (*GetLastScnAndSeq_func)(long*, long*, char **); + GetLastScnAndSeq_func GetLastScnAndSeq = (GetLastScnAndSeq_func)dlsym(handle, "GetLastScnAndSeq"); + + typedef int (*ConvertFlatBufferToBinlog_func)(char *, int, bool); + ConvertFlatBufferToBinlog_func ConvertFlatBufferToBinlog = (ConvertFlatBufferToBinlog_func)dlsym(handle, "ConvertFlatBufferToBinlog"); + + /////////////// 调用 API1 //////////////////// + // 1.1 在程序执行过程中,首次调用会初始化全局对象 logFileManager + char byteArray[] = { + 0x2F, 0x68, 0x6F, 0x6D, 0x65, 0x2F, 0x79, 0x69, 0x6E, 0x63, 0x6F, 0x6E, + 0x67, 0x2F, 0x44, 0x45, 0x56, 0x5F, 0x6C, 0x6F, 0x66, 0x74, 0x2F, 0x62, + 0x69, 0x6E, 0x6C, 0x6F, 0x67, 0x63, 0x6F, 0x6E, 0x76, 0x65, 0x72, 0x74, + 0x2F, 0x63, 0x6F, 0x6C, 0x6C, 0x65, 0x63, 0x74, 0x42, 0x69, 0x6E, 0x00 + }; + + long file_size = 1024 * 1024 * 50; + SetBinlogPath(byteArray, strlen(byteArray), file_size, 4, file_size * 100, 3600); + + /////////////// 准备测试数据 start //////////////////// + auto fileReader = std::make_unique(); + auto readFileStartTime = std::chrono::high_resolution_clock::now(); // 记录开始时间 + + std::string filename = "/home/yincong/DEV_loft/binlogconvert/test/data"; + fileReader->open(filename.c_str()); + auto [data, fileSize] = fileReader->readFromFile(filename); + auto bufferReader = std::make_unique(data.get(), fileSize); + + auto readFileEndTime = std::chrono::high_resolution_clock::now(); // 记录文件读取结束时间 + auto duration = std::chrono::duration_cast(readFileEndTime - readFileStartTime).count(); + std::cout << "read file time: " << duration << " ms" << std::endl; + + /////////////// 准备测试数据 end //////////////////// + + ////////////////// 模拟调用 API2 /////////////////////////// + std::vector futures; + int sql_cnt = 0; + bool is_ddl; + while (bufferReader->valid()) + { + sql_cnt++; + if (sql_cnt >= 9) break; + auto sql_len = bufferReader->read(); + if (sql_len == 0 || sql_len > MAX_SQL_LENGTH) { + continue; + } + std::unique_ptr buf = std::make_unique(sql_len); + bufferReader->memcpy(buf.get(), sql_len); + if (sql_cnt <= 2) is_ddl = true; + else is_ddl = false; + futures.emplace_back(ConvertFlatBufferToBinlog(buf.get(), sql_len, is_ddl)); + } + + ////////////////// 模拟调用 API3 /////////////////////////// + long scn = 0; + long seq = 0; + char *ckp = nullptr; + GetLastScnAndSeq(&scn, &seq, &ckp); + std::cout << " ckp: " << ckp << std::endl; + + dlclose(handle); + return 0; +} \ No newline at end of file diff --git a/binlogconvert/build.sh b/binlogconvert/build.sh new file mode 100755 index 0000000000000000000000000000000000000000..7dcbbaa1c476574a904ee915e3605fa723bfdd4e --- /dev/null +++ b/binlogconvert/build.sh @@ -0,0 +1,4 @@ +rm -rf build +mkdir build && cd build +cmake .. -DNDEBUG=1 -DLOFT_TESTING=NO +make -j$(nproc) \ No newline at end of file diff --git a/binlogconvert/ddl.fbs b/binlogconvert/ddl.fbs new file mode 100644 index 0000000000000000000000000000000000000000..4a8f0092c9fa01a2b5aea6af0b16fc17fe45839a --- /dev/null +++ b/binlogconvert/ddl.fbs @@ -0,0 +1,19 @@ +namespace loft; + +table DDL { // 定义表结构 + check_point: string; // checkpoint + db_name: string; // which 数据库名 + ddl_sql: string; // 具体 SQL 语句 + ddl_type: string; // ddl 类型 + last_commit: long; // 当前系统中 事务提交进度 + lsn: long; // lsn 序列号 + msg_time: string; // UCT 时间戳 + op_type: string; // {'DDL','I','U','D'} + scn: long; // 全局逻辑事务号 + seq: long; // 当前 redo log record 页面编号 + table_: string; // 表名 + tx_seq: long; // 当前事务号 + tx_time: string; // 事务开始时间 +} + +root_type DDL; // 根类型 \ No newline at end of file diff --git a/binlogconvert/deps/CMakeLists.txt b/binlogconvert/deps/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..cf4e31a52ebe343800d08e11a530d09690c2177f --- /dev/null +++ b/binlogconvert/deps/CMakeLists.txt @@ -0,0 +1,3 @@ +# 自动检测项目中的 test 目录下的测试文件 +set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) +add_subdirectory(googletest) \ No newline at end of file diff --git a/binlogconvert/deps/googletest b/binlogconvert/deps/googletest new file mode 160000 index 0000000000000000000000000000000000000000..a6ce08abf746c0aaa577520d6d1f6ea2abeeb61d --- /dev/null +++ b/binlogconvert/deps/googletest @@ -0,0 +1 @@ +Subproject commit a6ce08abf746c0aaa577520d6d1f6ea2abeeb61d diff --git a/binlogconvert/dml.fbs b/binlogconvert/dml.fbs new file mode 100644 index 0000000000000000000000000000000000000000..92f449733c2553129bdc40b59c37f677a425ca10 --- /dev/null +++ b/binlogconvert/dml.fbs @@ -0,0 +1,73 @@ +namespace loft; +//========================================================= +table FieldMeta { + data_type: string; + length: int; + precision: int; + is_unsigned: bool = false; + nullable: bool = false; + csname: string; // constrant: 需要判断 null +} + +table Field { // 定义 table schema + name: string; + meta: FieldMeta; +} +//========================================================= + +enum DataType : byte { + None = 0, + LongData = 1, + DoubleData = 2, + StringData = 3 +} + +table LongVal { + value: long; +} + +table DoubleVal { + value: double; +} + +table StringVal { + value: string; +} +// union 里的类型必须是表/向量/字符串,不能是基础类型 +union DataMeta { + // NoneVal, 暂时不写,让 union 自动生成的 None 代替 NoneVal + LongVal, + DoubleVal, + StringVal +} + +// newData 只存储 k-v +table kvPair { + key: string; + value: DataMeta; +} + +table DML { // 定义表结构 + check_point: string; // checkpoint + db_name: string; // which 数据库名 + dn: short; // machine 节点编号 + + fields: [Field]; // <- table fields 的元信息,create table 就固定下来,insert / update / delete 都用它 + keys: [kvPair]; // <- update / delete 会用到,指定操作的 索引 + + last_commit: long; // 当前系统中 事务提交进度 + lsn: long; // lsn 序列号 + msg_time: string; // UCT 时间戳 + + new_data: [kvPair]; // <- insert / update 会用到 + old_data: int = 0; // <- insert / update / delete 都没有使用 old_data? + + op_type: string; // {'DDL','I','U','D'} + scn: long; // 全局逻辑事务号 + seq: long; // 当前 redo log record 页面编号 + table_: string; // 表名 + tx_seq: long; // 当前事务号 + tx_time: string; // 事务开始时间 +} + +root_type DML; // 根类型 \ No newline at end of file diff --git a/binlogconvert/include/basic_ostream.h b/binlogconvert/include/basic_ostream.h new file mode 100644 index 0000000000000000000000000000000000000000..3dac7cd5ea02919c559abadb35ff1e78455979e7 --- /dev/null +++ b/binlogconvert/include/basic_ostream.h @@ -0,0 +1,74 @@ +// refer from: sql/basic_ostream.h +#pragma once + +#include +#include +#include + +#include "common/logging.h" +#include "common/rc.h" +#include "common/type_def.h" +/** + Basic_ostream 抽象类提供 write(), seek(), sync(), flush() + 接口,用于写入数据到 buffer 中 +*/ +class Basic_ostream { +public: + virtual ~Basic_ostream() = default; + + // Write data to buffer, return true on success, false on failure + virtual bool write(const uchar *buffer, my_off_t length) = 0; + virtual RC seek(my_off_t position) = 0; + virtual RC sync() = 0; + virtual RC flush() = 0; + virtual my_off_t get_position() = 0; +}; + +/** + * 专门写 binlog 文件的流对象 + */ +class Binlog_ofile : public Basic_ostream { +public: + Binlog_ofile(const char *binlog_name, RC &rc); + ~Binlog_ofile() override = default; + + bool write(const uchar *buffer, my_off_t length) override; + RC seek(my_off_t position) override; + RC sync() override; + RC flush() override; + + // Helper functions + my_off_t get_position() override { return m_position_; }; + + bool is_empty() const { return m_position_ == 0; } + + bool is_open() const { return m_pipeline_head_ != nullptr; } + + bool open(const char *binlog_name) + { + std::unique_ptr file_ostream = std::make_unique( + binlog_name, std::ios::in | std::ios::out | std::ios::binary | std::ios::app); + if (!file_ostream->is_open()) { + return false; + } + // 移动到文件末尾 + file_ostream->seekp(0, std::ios::end); + m_position_ = file_ostream->tellp(); + m_pipeline_head_ = std::move(file_ostream); + return true; + } + + void close() + { + if (m_pipeline_head_) { + LOG_INFO(" binlog ostream exit...."); + m_pipeline_head_->close(); + m_pipeline_head_.reset(); + m_position_ = 0; + } + } + +private: + my_off_t m_position_; + std::unique_ptr m_pipeline_head_; +}; diff --git a/binlogconvert/include/binlog.h b/binlogconvert/include/binlog.h new file mode 100644 index 0000000000000000000000000000000000000000..d1df51f2cc56256453fd830173e040d52778c67f --- /dev/null +++ b/binlogconvert/include/binlog.h @@ -0,0 +1,75 @@ +#pragma once + +#include + +#include "basic_ostream.h" +#include "common/init_setting.h" +#include "common/logging.h" +#include "common/rc.h" +#include "events/abstract_event.h" +#include "events/control_events.h" + +/** + Transaction Coordinator Log. + + 提供三种实现: + 1. one using an in-memory structure, + 2. one dummy that does not do anything 不保证事务,只写 log 到 file + 3. one using the binary log for transaction coordination. [only impl it] +*/ +class TC_LOG { +public: + TC_LOG() = default; + virtual ~TC_LOG() = default; + + enum enum_result { RESULT_SUCCESS, RESULT_ABORTED, RESULT_INCONSISTENT }; + + virtual RC open() = 0; + virtual RC close() = 0; +}; + +// 暂时不考虑 index 文件、lock +class MYSQL_BIN_LOG : TC_LOG { +public: + MYSQL_BIN_LOG(const char *file_name, uint64_t file_size, RC &rc); + ~MYSQL_BIN_LOG() override = default; + + //********************* common file operation ************************* + RC open() override; // 构造函数 + RC close() override; + + void flush() { m_binlog_file_->flush(); } + + //********************* file write operation ************************* + bool write(const uchar *buffer, my_off_t length) + { + return m_binlog_file_->write(buffer, length); + } + bool write_event_to_binlog(AbstractEvent *ev); + + bool remain_bytes_safe(uint32 event_len) + { + return m_binlog_file_->get_position() + event_len + WRITE_THRESHOLD < max_size_; + } + uint64 get_bytes_written() { return m_binlog_file_->get_position(); } + + void reset_bytes_written() { bytes_written_ = 0; } + + void update_binlog_end_pos(const char *file, my_off_t pos); + +private: + enum enum_log_state_ { + LOG_OPENED, + LOG_CLOSED, + }; + + std::atomic atomic_log_state_; // 描述文件打开状态 + + char file_name_[FN_REFLEN]; // binlog 文件名 + // 当前 binlog file 写到一定大小时,触发写入 rotate event + uint64_t max_size_; // binlog 文件最大大小 + + my_off_t bytes_written_; // binlog 文件当前写入大小 + + std::unique_ptr m_binlog_file_; +}; diff --git a/binlogconvert/include/buffer_reader.h b/binlogconvert/include/buffer_reader.h new file mode 100644 index 0000000000000000000000000000000000000000..ebc4e11d363a8c21d9d649f0165500490673c8b5 --- /dev/null +++ b/binlogconvert/include/buffer_reader.h @@ -0,0 +1,74 @@ +#pragma once + +#include +#include +#include +#include +#include + +class BufferReader { +public: + BufferReader(const char *buffer, unsigned long long length) noexcept; + ~BufferReader() = default; + + /** + * @brief 一次性读取 sizeof(T) 个 char + * byte,并将指针向前移动,读取已做小端处理 + */ + template T read(unsigned char bytes = sizeof(T)); + + template void memcpy(T destination, size_t length); + + /** + * @brief ptr 向前移动 length 个 byte + * @param length + */ + void forward(size_t length); + + unsigned long long position() const noexcept; + bool valid() const noexcept; + +private: + /** + * @brief 小端解释读出 value + */ + template static T letoh(T value); + +private: + const char *buffer_; + const char *ptr_; + unsigned long long limit_; +}; + +template T BufferReader::read(unsigned char bytes) +{ + if (ptr_ + bytes > buffer_ + limit_) { + throw std::out_of_range("Attempt to read beyond buffer limit"); + } + T value = 0; + std::memcpy(reinterpret_cast(&value), ptr_, bytes); + ptr_ += bytes; + return (bytes > 1) ? letoh(value) : value; +} + +template void BufferReader::memcpy(T destination, size_t length) +{ + if (ptr_ + length > buffer_ + limit_) { + throw std::out_of_range("Attempt to copy beyond buffer limit"); + } + std::memcpy(destination, ptr_, length); + ptr_ += length; +} + +template T BufferReader::letoh(T value) +{ + if (std::is_same::value || std::is_same::value) { + return le16toh(value); + } else if (std::is_same::value || std::is_same::value) { + return le32toh(value); + } else if (std::is_same::value || std::is_same::value) { + return le64toh(value); + } else { + throw std::invalid_argument("Unsupported type for letoh"); + } +} diff --git a/binlogconvert/include/common/init_setting.h b/binlogconvert/include/common/init_setting.h new file mode 100644 index 0000000000000000000000000000000000000000..4a2fd9c9f2f3ca23fc8401f6abf08090d4a00bcd --- /dev/null +++ b/binlogconvert/include/common/init_setting.h @@ -0,0 +1,66 @@ +#pragma once + +#include +#include + +#define MAGIC_NUM_SIZE 4 +#define FDE_SIZE 117 +#define ROTATE_SIZE 36 + +// *** binlog file write configuration *** +#define DEFAULT_BINLOG_FILE_DIR "/home/yincong/collectBin/" +#define DEFAULT_BINLOG_FILE_NAME_PREFIX "teledb-bin" +#define DEFAULT_BINLOG_FILE_SIZE (1024 * 1024 * 20) // 每个 binlog 文件 20 M + +// 200 byte 预留给 rotate event +#define WRITE_THRESHOLD 200 + +#define THREAD_POOL_NAME "LogProcessor" +#define CORE_THREAD_NUM 1 +#define THRANSFORM_THREAD_ALIVE_MS 1000 + +// arbitrary +#define DML_TABLE_ID 13 + +// *** common header *** +#define SERVER_ID 100 + +// *** fde event *** +#define BINLOG_VERSION 4 +#define SERVER_VERSION_STR "8.0.32-debug" + +// *** gtid event *** +#define ORIGINAL_SERVER_VERSION 80032 +#define IMMEDIATE_SERVER_VERSION 80032 + +// *** query event **** +#define THREAD_ID 10000 +#define EXEC_TIME 2 +#define ERROR_CODE 0 + +#define FLAGS2_OFFSET 4 +#define CHARSET_OFFSET 6 +#define MICROSECONDS_OFFSET 3 +#define DDL_XID_OFFSET 8 +#define QUERY_STATUS_FLAG_OFFSET 1 + +#define EMPTY_DB_INDICATOR 254 +#define TS_MICROSECOND_PART 1000000 + +#define DML_QUERY_STR "BEGIN" + +// *** format description event **** +#define MYSQL_BINLOG_VERSION 4 + +// *** rows event **** +#define FIELD_METADATA_SIZE 4 +#define MAX_METADATA_SIZE 251 +#define BIT_PER_BYTE 8 + +// *** write event **** +#define FRAC_DIGITS 99999999 +#define INT_DIGITS 1000000000 +#define MAX_PRECISION 9 + +// IO size 一般规定为 4KB +constexpr const size_t IO_SIZE{4096}; diff --git a/binlogconvert/include/common/logging.h b/binlogconvert/include/common/logging.h new file mode 100644 index 0000000000000000000000000000000000000000..f6046af07a02f9fb9f4251efe46d6dfcc41235d9 --- /dev/null +++ b/binlogconvert/include/common/logging.h @@ -0,0 +1,79 @@ +#pragma once + +#include + +#include +#include +#include +#include + +#define LOG_FILE_PATH "loft_log.txt" +#define LOG_LOG_TIME_FORMAT "%Y-%m-%d %H:%M:%S" + +#define GET_TIME \ + time_t t = ::time(nullptr); \ + tm *curTime = localtime(&t); \ + char time_str[32]; \ + ::strftime(time_str, 32, LOG_LOG_TIME_FORMAT, curTime); + +#define TIME time_str + +#define DEBUG(format, ...) printf(format, ##__VA_ARGS__) + +#define SHORT_FILE (strrchr(__FILE__, '/') ? (strrchr(__FILE__, '/') + 1) : __FILE__) + +// 定义日志级别 + +#define LOG_LEVEL_OFF (0) +#define LOG_LEVEL_FATAL (1) +#define LOG_LEVEL_ERROR (2) +#define LOG_LEVEL_INFO (100) +#define LOG_LEVEL_DEBUG (4) + +#define level LOG_LEVEL_DEBUG + +#if level >= LOG_LEVEL_FATAL +#define LOG_FATAL(format, ...) \ + do { \ + GET_TIME \ + DEBUG("\033[;31m[FATAL] %s %s:%d: " format "\n\033[0m", TIME, __FILE__, __LINE__, \ + ##__VA_ARGS__); \ + fflush(stdout); \ + abort(); \ + } while (0) +#else +#define LOG_FATAL(format, ...) +#endif + +#if level >= LOG_LEVEL_ERROR +#define LOG_ERROR(format, ...) \ + do { \ + GET_TIME \ + DEBUG("\033[;31m[ERROR] %s %s:%d: " format "\n\033[0m", TIME, __FILE__, __LINE__, \ + ##__VA_ARGS__); \ + } while (0) +#else +#define LOG_ERROR(format, ...) +#endif + +#if level >= LOG_LEVEL_INFO +#define LOG_INFO(format, ...) \ + do { \ + GET_TIME \ + DEBUG("\033[;34m[INFO] %s %s:%d: " format "\n\033[0m", TIME, SHORT_FILE, __LINE__, \ + ##__VA_ARGS__); \ + } while (0) +#else +#define LOG_INFO(format, ...) +#endif + +#if level >= LOG_LEVEL_DEBUG +#define LOG_DEBUG(format, ...) \ + do { \ + GET_TIME \ + DEBUG("\033[;33m[DEBUG] %s %s:%d: " format "\n\033[0m", TIME, SHORT_FILE, __LINE__, \ + ##__VA_ARGS__); \ + } while (0) +#else +#define LOG_DEBUG(format, ...) +#endif diff --git a/binlogconvert/include/common/macros.h b/binlogconvert/include/common/macros.h new file mode 100644 index 0000000000000000000000000000000000000000..5cb1484ecaa57966e833a6db1b0d392bf341e9a0 --- /dev/null +++ b/binlogconvert/include/common/macros.h @@ -0,0 +1,27 @@ +#pragma once + +#include // std::cerr + +#define likely(x) __builtin_expect((x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) + +// Macros to disable copying and moving +#define DISALLOW_COPY(cname) \ + cname(const cname &) = delete; \ + auto operator=(const cname &)->cname & = delete; + +#define DISALLOW_MOVE(cname) \ + cname(cname &&) = delete; \ + auto operator=(cname &&)->cname & = delete; + +#define DISALLOW_COPY_AND_MOVE(cname) \ + DISALLOW_COPY(cname); \ + DISALLOW_MOVE(cname); + +#define LOFT_ASSERT(expr, message) assert((expr) && (message)) + +#define LOFT_VERIFY(expr, message) \ + if (unlikely(expr)) { \ + std::cerr << "ERROR: " << (message) << std::endl; \ + std::terminate(); \ + } diff --git a/binlogconvert/include/common/mysql_constant_def.h b/binlogconvert/include/common/mysql_constant_def.h new file mode 100644 index 0000000000000000000000000000000000000000..e527dcc9e79fafe84a2ad8af006af7e978406978 --- /dev/null +++ b/binlogconvert/include/common/mysql_constant_def.h @@ -0,0 +1,116 @@ +#pragma once + +#include + +#include "common/type_def.h" +#include "sql/mysql_fields.h" + +/****************************************************************************** + Event Common Footer +******************************************************************************/ +#define binlog_checksum_options BINLOG_CHECKSUM_ALG_CRC32 + +/****************************************************************************** + Format-Description-Event +******************************************************************************/ +#define ST_SERVER_VER_LEN 50 + +#define MAX_SIZE_LOG_EVENT_STATUS \ + (1U + 4 /* type, flags2 */ + 1U + 8 /* type, sql_mode */ + 1U + 1 + \ + 255 /* type, length, catalog */ + 1U + 4 /* type, auto_increment */ + 1U + \ + 6 /* type, charset */ + 1U + 1 + MAX_TIME_ZONE_NAME_LENGTH /* type, length, time_zone */ + \ + 1U + 2 /* type, lc_time_names_number */ + 1U + 2 /* type, charset_database_number */ + 1U + \ + 8 /* type, table_map_for_update */ + 1U + 1 + 32 * 3 /* type, user_len, user */ + 1 + \ + 255 /* host_len, host */ \ + + 1U + 1 + (MAX_DBS_IN_EVENT_MTS * (1 + NAME_LEN)) /* type, db_1, db_2, ... */ \ + + 1U + 3 /* type, microseconds */ + 1U + 1 /* type, explicit_def..ts*/ + 1U + \ + 8 /* type, xid of DDL */ + 1U + 2 /* type, default_collation_for_utf8mb4_number */ + 1U + \ + 1 /* sql_require_primary_key */ + 1U + 1 /* type, default_table_encryption */) + +/** + Maximum length of time zone name that we support (Time zone name is + char(64) in db). mysqlbinlog needs it. +*/ +#define MAX_TIME_ZONE_NAME_LENGTH (NAME_LEN + 1) + +/** + When the actual number of databases exceeds MAX_DBS_IN_EVENT_MTS + the value of OVER_MAX_DBS_IN_EVENT_MTS is is put into the + mts_accessed_dbs status. +*/ +#define OVER_MAX_DBS_IN_EVENT_MTS 254 + +/****************************************************************************** + Query-log-event +******************************************************************************/ + +#define MAX_DBS_IN_EVENT_MTS 16 // 最大的可以更改的 dbs 数量 +const uint64 INVALID_XID = 0xffffffffffffffffULL; // 最大事务号 + +/****************************************************************************** + Magic number +******************************************************************************/ +#define BINLOG_MAGIC "\xfe\x62\x69\x6e" // binlog文件起始 4 个 byte 是 magic number +#define BINLOG_MAGIC_SIZE 4 +#define BIN_LOG_HEADER_SIZE 4U +#define BINLOG_CHECKSUM_LEN 4 +#define BINLOG_CHECKSUM_ALG_DESC_LEN 1 /* 1 byte checksum alg descriptor */ + +/****************************************************************************** + Event Common Header +******************************************************************************/ + +/** start event post-header (for v3 and v4) */ +#define ST_BINLOG_VER_OFFSET 0 +#define ST_SERVER_VER_OFFSET 2 +#define ST_CREATED_OFFSET (ST_SERVER_VER_OFFSET + ST_SERVER_VER_LEN) +#define ST_COMMON_HEADER_LEN_OFFSET (ST_CREATED_OFFSET + 4) + +#define EVENT_TYPE_OFFSET 4 +#define SERVER_ID_OFFSET 5 +#define EVENT_LEN_OFFSET 9 +#define LOG_POS_OFFSET 13 +#define FLAGS_OFFSET 17 +#define LOG_EVENT_HEADER_LEN 19U /* the fixed header length */ + +/****************************************************************************** + File General constants | refer from include/my_io.h +******************************************************************************/ + +#define FN_LEN 256 /* Max file name len */ +#define FN_HEADLEN 253 /* Max length of filepart of file name */ +#define FN_REFLEN 512 /* Max length of full path-name */ + +#define SYSTEM_CHARSET_MBMAXLEN 3 +#define NAME_CHAR_LEN 64 /* Field/table name length */ +#define NAME_LEN (NAME_CHAR_LEN * SYSTEM_CHARSET_MBMAXLEN) + +/****************************************************************************** + decimal.cpp +******************************************************************************/ + +#define DIG_PER_DEC1 9 +#define E_DEC_OK 0 +#define E_DEC_TRUNCATED 1 +#define E_DEC_OVERFLOW 2 +#define E_DEC_DIV_ZERO 4 +#define E_DEC_BAD_NUM 8 +#define E_DEC_OOM 16 +#define _MY_NMR 04 /* Numeral (digit) */ +#define _MY_SPC 010 /* Spacing character */ +#define ROUND_UP(X) (((X) + DIG_PER_DEC1 - 1) / DIG_PER_DEC1) +#define sanity(d) assert((d)->len > 0) +#define my_isspace(s, c) (((s)->ctype + 1)[(uchar)(c)] & _MY_SPC) +#define my_isdigit(s, c) (((s)->ctype + 1)[(uchar)(c)] & _MY_NMR) +#define MY_ERRNO_ERANGE 34 +#define MY_ERRNO_EDOM 33 + +#define EXTRA_ROW_INFO_LEN_OFFSET 0 +#define EXTRA_ROW_INFO_FORMAT_OFFSET 1 +#define EXTRA_ROW_INFO_HEADER_LENGTH 2 +#define EXTRA_ROW_INFO_MAX_PAYLOAD (255 - EXTRA_ROW_INFO_HEADER_LENGTH) +#define ROWS_MAPID_OFFSET 0 +#define ROWS_FLAGS_OFFSET 6 +#define ROWS_VHLEN_OFFSET 8 +#define EXTRA_ROW_INFO_TYPECODE_LENGTH 1 +#define EXTRA_ROW_PART_INFO_VALUE_LENGTH 2 diff --git a/binlogconvert/include/common/queue.h b/binlogconvert/include/common/queue.h new file mode 100644 index 0000000000000000000000000000000000000000..c33fdae19ba1e064ab66f6066e0908ac730ae286 --- /dev/null +++ b/binlogconvert/include/common/queue.h @@ -0,0 +1,48 @@ +#pragma once + +#include "rc.h" + +namespace common { + +/** + * @brief 任务队列 + */ + +/** + * @brief 任务队列接口 + * @ingroup Queue + * @tparam T 任务数据类型。 + */ +template class Queue { +public: + using value_type = T; + +public: + Queue() = default; + virtual ~Queue() = default; + + /** + * @brief 在队列中放一个任务 + * + * @param value 任务数据 + * @return RC 成功返回RC::SUCCESS + */ + virtual RC push(value_type &&value) = 0; + + /** + * @brief 从队列中取出一个任务 + * + * @param value 任务数据 + * @return RC 成功返回RC::SUCCESS。如果队列为空,也不是成功的 + */ + virtual RC pop(value_type &value) = 0; + + /** + * @brief 当前队列中任务的数量 + * + * @return int 对列中任务的数量 + */ + virtual int size() const = 0; +}; + +} // namespace common diff --git a/binlogconvert/include/common/rc.h b/binlogconvert/include/common/rc.h new file mode 100644 index 0000000000000000000000000000000000000000..6da167c4988a37df702424f1e166a7cbf4a61f6e --- /dev/null +++ b/binlogconvert/include/common/rc.h @@ -0,0 +1,31 @@ +#pragma once + +#define DEFINE_RCS \ + DEFINE_RC(SUCCESS) \ + DEFINE_RC(INVALID_ARGUMENT) \ + DEFINE_RC(UNREACHABLE) \ + DEFINE_RC(UNIMPLEMENTED) \ + DEFINE_RC(FIELD_TYPE_UNSUPPORTED) \ + DEFINE_RC(FIELD_CS_UNSUPPORTED) \ + DEFINE_RC(IOERR_WRITE) \ + DEFINE_RC(IOERR_OPEN) \ + DEFINE_RC(IOERR_CLOSE) \ + DEFINE_RC(IOERR_SEEK) \ + DEFINE_RC(IOERR_SYNC) \ + DEFINE_RC(FILE_NOT_EXIST) \ + DEFINE_RC(FILE_CREATE) \ + DEFINE_RC(FILE_NOT_OPENED) \ + DEFINE_RC(SPEED_LIMIT) \ + DEFINE_RC(CKP_PARSE_ERROR) \ + DEFINE_RC(ControlFile_Corrupt) + +enum class RC { +#define DEFINE_RC(name) name, + DEFINE_RCS +#undef DEFINE_RC +}; + +extern const char *strrc(RC rc); + +extern bool LOFT_SUCC(RC rc); +extern bool LOFT_FAIL(RC rc); diff --git a/binlogconvert/include/common/runnable.h b/binlogconvert/include/common/runnable.h new file mode 100644 index 0000000000000000000000000000000000000000..ab4ccb9b192d6a6556d5b0ecda24dde5a994ba9f --- /dev/null +++ b/binlogconvert/include/common/runnable.h @@ -0,0 +1,34 @@ +#pragma once + +#include + +namespace common { + +/** + * @brief 可执行对象接口 + */ +class Runnable { +public: + Runnable() = default; + virtual ~Runnable() = default; + + virtual void run() = 0; +}; + +/** + * @brief 可执行对象适配器,方便使用lambda表达式 + * @ingroup ThreadPool + */ +class RunnableAdaptor : public Runnable { +public: + RunnableAdaptor(std::function callable) + : callable_(callable) + {} + + void run() override { callable_(); } + +private: + std::function callable_; +}; + +} // namespace common diff --git a/binlogconvert/include/common/simple_queue.h b/binlogconvert/include/common/simple_queue.h new file mode 100644 index 0000000000000000000000000000000000000000..94cb4757bc6c23eedbf9b294cf175817067aeec1 --- /dev/null +++ b/binlogconvert/include/common/simple_queue.h @@ -0,0 +1,60 @@ +#pragma once + +#include +#include + +#include "common/queue.h" + +namespace common { + +/** + * @brief 一个十分简单的线程安全的任务队列 + * @tparam T 任务数据类型。 + */ +template class SimpleQueue : public Queue { +public: + using value_type = T; + +public: + SimpleQueue() + : Queue() + {} + virtual ~SimpleQueue() {} + + //! @copydoc Queue::emplace + RC push(value_type &&value) override; + //! @copydoc Queue::pop + RC pop(value_type &value) override; + //! @copydoc Queue::size + int size() const override; + +private: + std::mutex mutex_; + std::queue queue_; +}; + +template RC SimpleQueue::push(T &&value) +{ + std::lock_guard lock(mutex_); + queue_.push(std::move(value)); + return RC::SUCCESS; +} + +template RC SimpleQueue::pop(T &value) +{ + std::lock_guard lock(mutex_); + if (queue_.empty()) { + return RC::INVALID_ARGUMENT; + } + + value = std::move(queue_.front()); + queue_.pop(); + return RC::SUCCESS; +} + +template int SimpleQueue::size() const +{ + return queue_.size(); +} + +} // namespace common diff --git a/binlogconvert/include/common/task_queue.h b/binlogconvert/include/common/task_queue.h new file mode 100644 index 0000000000000000000000000000000000000000..0eff2d04e03cbf3ef2f6427ce401254835ab4e91 --- /dev/null +++ b/binlogconvert/include/common/task_queue.h @@ -0,0 +1,96 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "type_def.h" + +struct Task { + std::vector data_; // 一条 sql 转换后的 events 的序列化结果 + bool is_ddl_; // true 表示 ddl, false 表示 dml + Task() + : is_ddl_(false) + {} + + Task(char *fbStr, int length, bool ddl) + : data_(fbStr, fbStr + length) + , is_ddl_(ddl) + {} + + Task(std::vector &&d, bool ddl) + : data_(std::move(d)) + , is_ddl_(ddl) + {} +}; + +/** + * @brief 生产者任务队列 + */ +template class TaskQueue { +public: + explicit TaskQueue(size_t capacity) + : capacity_(capacity) + , head_(0) + , tail_(0) + , size_(0) + , buffer_(capacity) + {} + + /** + * @brief 像循环队列中写入一个待转换的 SQL 任务 + * @param task + */ + bool write(T &&task) + { + std::unique_lock lock(mutex_); + cond_not_full_.wait(lock, [this] { return size_ < capacity_; }); // 等待有空位 + + buffer_[tail_] = std::move(task); + tail_ = (tail_ + 1) % capacity_; + ++size_; + + cond_not_empty_.notify_one(); // 通知有新任务 + return true; + } + + /** + * @brief 从循环队列中读取一个待转换的 SQL 任务 + * @param task + */ + bool read(T &task) + { + std::unique_lock lock(mutex_); + cond_not_empty_.wait(lock, [this] { return size_ > 0; }); // 等待有任务 + + task = buffer_[head_]; + head_ = (head_ + 1) % capacity_; + --size_; + + cond_not_full_.notify_one(); // 通知有空位 + return true; + } + + /** + * @brief 获取当前队列中的任务数量(阻塞直到队列不为空) + * @return 队列中的任务数量 + */ + size_t get_task_count_blocking() + { + std::unique_lock lock(mutex_); + return size_; + } + +private: + size_t capacity_; // 循环队列的容量 + size_t head_; // 队首指针 + size_t tail_; // 队尾指针 + size_t size_; // 队列中元素的个数 + std::vector buffer_; // 循环队列的缓冲区 + + std::mutex mutex_; + std::condition_variable cond_not_empty_; + std::condition_variable cond_not_full_; +}; diff --git a/binlogconvert/include/common/thread_pool_executor.h b/binlogconvert/include/common/thread_pool_executor.h new file mode 100644 index 0000000000000000000000000000000000000000..e2dce19d9e209f0e298728f736859874080dc37b --- /dev/null +++ b/binlogconvert/include/common/thread_pool_executor.h @@ -0,0 +1,177 @@ +#pragma once + +#include + +#include +#include +#include +#include +#include +#include + +#include "queue.h" +#include "runnable.h" +#include "type_def.h" +#include "rc.h" + +namespace common { + +/** + * @brief 模拟java ThreadPoolExecutor 做一个简化的线程池 + * @defgroup ThreadPool + * @details + * 一个线程池包含一个任务队列和一组线程,当有任务提交时,线程池会从任务队列中取出任务分配给一个线程执行。 + * 这里的接口设计参考了Java的线程池ThreadPoolExecutor,但是简化了很多。 + * + * 这个线程池支持自动伸缩。 + * 线程分为两类,一类是核心线程,一类是普通线程。核心线程不会退出,普通线程会在空闲一段时间后退出。 + * 线程池有一个任务队列,收到的任务会放到任务队列中。当任务队列中任务的个数比当前线程个数多时,就会 + * 创建新的线程。 + * + * TODO 任务execute接口,增加一个future返回值,可以获取任务的执行结果 + */ +class ThreadPoolExecutor { +public: + ThreadPoolExecutor() = default; + virtual ~ThreadPoolExecutor(); + + /** + * @brief 初始化线程池 + * + * @param name 线程池名称 + * @param core_size 核心线程个数。核心线程不会退出 + * @param max_size 线程池最大线程个数 + * @param keep_alive_time_ms 非核心线程空闲多久后退出 + */ + RC init(const char *name, int core_size, int max_size, long keep_alive_time_ms); + + /** + * @brief 初始化线程池 + * + * @param name 线程池名称 + * @param core_size 核心线程个数。核心线程不会退出 + * @param max_size 线程池最大线程个数 + * @param keep_alive_time_ms 非核心线程空闲多久后退出 + * @param work_queue 任务队列 + */ + RC init(const char *name, int core_pool_size, int max_pool_size, long keep_alive_time_ms, + std::unique_ptr>> &&work_queue); + + /** + * @brief 提交一个任务,不一定可以立即执行 + * + * @param task 任务 + * @return int 成功放入队列返回0 + */ + RC execute(std::unique_ptr &&task); + + /** + * @brief 提交一个任务,不一定可以立即执行 + * + * @param callable 任务 + * @return int 成功放入队列返回0 + */ + RC execute(const std::function &callable); + + /** + * @brief 关闭线程池 + */ + RC shutdown(); + /** + * @brief 等待线程池处理完所有任务并退出 + */ + RC await_termination(); + +public: + /** + * @brief 当前活跃线程的个数,就是正在处理任务的线程个数 + */ + int active_count() const { return active_count_.load(); } + /** + * @brief 核心线程个数 + */ + int core_pool_size() const { return core_pool_size_; } + /** + * @brief 线程池中线程个数 + */ + int pool_size() const { return static_cast(threads_.size()); } + /** + * @brief 曾经达到过的最大线程个数 + */ + int largest_pool_size() const { return largest_pool_size_; } + /** + * @brief 处理过的任务个数 + */ + int64 task_count() const { return task_count_.load(); } + + /** + * @brief 任务队列中的任务个数 + */ + int64 queue_size() const { return static_cast(work_queue_->size()); } + + /** + * @brief 打印当前线程池状态 + */ + void log_status() const; + +private: + /** + * @brief 创建一个线程 + * + * @param core_thread 是否是核心线程 + */ + RC create_thread(bool core_thread); + /** + * @brief 创建一个线程。调用此函数前已经加锁 + * + * @param core_thread 是否是核心线程 + */ + RC create_thread_locked(bool core_thread); + /** + * @brief 检测是否需要扩展线程,如果需要就扩展 + */ + RC extend_thread(); + +private: + /** + * @brief 线程函数。从队列中拉任务并执行 + */ + void thread_func(); + +private: + /** + * @brief 线程池的状态 + */ + enum class State { + NEW, //! 新建状态 + RUNNING, //! 正在运行 + TERMINATING, //! 正在停止 + TERMINATED //! 已经停止 + }; + + struct ThreadData { + bool core_thread = false; /// 是否是核心线程 + bool idle = false; /// 是否空闲 + bool terminated = false; /// 是否已经退出 + std::thread *thread_ptr = nullptr; /// 线程指针 + }; + +private: + State state_ = State::NEW; /// 线程池状态 + + int core_pool_size_ = 0; /// 核心线程个数 + int max_pool_size_ = 0; /// 最大线程个数 + std::chrono::milliseconds keep_alive_time_ms_; /// 非核心线程空闲多久后退出 + + std::unique_ptr>> work_queue_; /// 任务队列 + + mutable std::mutex lock_; /// 保护线程池内部数据的锁 + std::map threads_; /// 线程列表 + + int largest_pool_size_ = 0; /// 历史上达到的最大的线程个数 + std::atomic task_count_{0}; /// 处理过的任务个数 + std::atomic active_count_{0}; /// 活跃线程个数 + const char *pool_name_; /// 线程池名称 +}; + +} // namespace common diff --git a/binlogconvert/include/common/thread_util.h b/binlogconvert/include/common/thread_util.h new file mode 100644 index 0000000000000000000000000000000000000000..feb4b045130590a01c9d830fb804c551dea2c3f9 --- /dev/null +++ b/binlogconvert/include/common/thread_util.h @@ -0,0 +1,15 @@ +#pragma once + +namespace common { + +/** + * @brief 设置当前线程的名字 + * @details 设置当前线程的名字可以帮助调试多线程程序,比如在gdb或者 top + * -H命令可以看到线程名字。 + * pthread_setname_np在Linux和Mac上实现不同。Linux上可以指定线程号设置名称,但是Mac上不行。 + * @param name 线程的名字。按照linux手册中描述,包括\0在内,不要超过16个字符 + * @return int 设置成功返回0 + */ +int thread_set_name(const char *name); + +} // namespace common diff --git a/binlogconvert/include/common/type_def.h b/binlogconvert/include/common/type_def.h new file mode 100644 index 0000000000000000000000000000000000000000..14473b61ba25397257de75c6a77c8a27315e6419 --- /dev/null +++ b/binlogconvert/include/common/type_def.h @@ -0,0 +1,38 @@ +#pragma once + +#include + +using uint8 = std::uint8_t; +using uint16 = std::uint16_t; +using int16 = std::int16_t; + +using int32 = std::int32_t; +using uint32 = std::uint32_t; + +using int64 = std::int64_t; +using uint64 = std::uint64_t; + +typedef unsigned int uint; +typedef unsigned long ulong; +typedef long long longlong; +typedef unsigned long long ulonglong; + +using my_off_t = std::uint64_t; +using uchar = unsigned char; + +// used for decimal +using dec1 = std::int32_t; +using udec1 = std::uint32_t; + +#define INT_MIN16 (~0x7FFF) +#define INT_MAX16 0x7FFF +#define INT_MIN32 (~0x7FFFFFFFL) +#define INT_MAX32 0x7FFFFFFFL +#define UINT_MAX16 0xFFFF +#define UINT_MAX32 0xFFFFFFFFL + +struct MYSQL_LEX_CSTRING { + const char *str; + std::size_t length; +}; +typedef struct MYSQL_LEX_CSTRING LEX_CSTRING; \ No newline at end of file diff --git a/binlogconvert/include/data_handler.h b/binlogconvert/include/data_handler.h new file mode 100644 index 0000000000000000000000000000000000000000..931774b0d1cc6ed8c33de0fd2d106ce7da579d1a --- /dev/null +++ b/binlogconvert/include/data_handler.h @@ -0,0 +1,214 @@ +#pragma once + +#include "common/rc.h" +#include "events/write_event.h" +#include "format/dml_generated.h" +#include "sql/mysql_fields.h" +#include "utils/base64.h" + +#include // setprecision +#include +#include +#include // ostringstream + +using namespace loft; + +// mysql 存储 year 类型,如果是 19xx 则减去 1900,否则减去 2000 +constexpr int YEAR_BASE_2000 = 2000; +constexpr int YEAR_BASE_1900 = 1900; + +/** + * @brief 通用的数据处理接口 + */ +class FieldDataHandler { +public: + virtual RC processData(const kvPair *data, mysql::Field *field, Rows_event *row) = 0; + virtual ~FieldDataHandler() = default; +}; + +/** + * @brief long / double / string 实现具体的处理器 + */ +class LongValueHandler : public FieldDataHandler { +public: + RC processData(const kvPair *data, mysql::Field *field, Rows_event *row) override + { + int64 value = data->value_as_LongVal()->value(); + + if (field->type() == MYSQL_TYPE_YEAR) { + if (1000 <= value && value <= 9999) { // YEAR(4) + if (1901 <= value && value <= 1970) { + value -= 1900; + } else { + value -= 1970; + } + } else { // YEAR(2) + if (value <= 69) { + value += 2000; + } else { + value += 1900; + } + } + } + row->writeData(reinterpret_cast(&value), field->type(), field->pack_length()); + return RC::SUCCESS; + } +}; + +class DoubleValueHandler : public FieldDataHandler { +public: + RC processData(const kvPair *data, mysql::Field *field, Rows_event *row) override + { + double value = data->value_as_DoubleVal()->value(); + + if (field->type() == MYSQL_TYPE_FLOAT) { + float float_value = value; + row->writeData(reinterpret_cast(&float_value), field->type(), + field->pack_length()); + } else if (field->type() == MYSQL_TYPE_TIME) { + // 将 double 转换为字符串 + std::ostringstream oss; + oss << std::fixed << std::setprecision(field->decimals()) << value; + std::string time_str = oss.str(); + std::vector time_str_bytes(time_str.begin(), time_str.end()); + + row->writeData(time_str_bytes.data(), field->type(), field->pack_length(), + time_str.size(), field->decimals()); + } else { + double double_value = value; + row->writeData(reinterpret_cast(&double_value), field->type(), + field->pack_length()); + } + return RC::SUCCESS; + } +}; + +class StringValueHandler : public FieldDataHandler { +public: + RC processData(const kvPair *data, mysql::Field *field, Rows_event *row) override + { + const char *str = data->value_as_StringVal()->value()->c_str(); + + if (field->type() == MYSQL_TYPE_NEWDECIMAL) { + + std::string convertedStr; + if (isScientificNotation(str)) { + convertedStr = convertScientificToDecimal(str); + } else { + convertedStr = str; + } + + row->writeData( + const_cast(reinterpret_cast(convertedStr.c_str())), + field->type(), convertedStr.size(), convertedStr.size(), field->pack_length(), + field->decimals()); + + } else if (field->type() == MYSQL_TYPE_DATETIME || field->type() == MYSQL_TYPE_TIMESTAMP2) { + row->writeData(const_cast(reinterpret_cast(str)), field->type(), + field->pack_length(), strlen(str), field->decimals()); + } else if (field->type() == MYSQL_TYPE_JSON) { + // base64 解码,后不用按照 3 组再合并解释出来,直接写入 + auto dst = base64_decode(str); + row->writeData(dst.data(), field->type(), field->pack_length(), dst.size()); + } else { + char *dst = (char *)malloc(base64_needed_decoded_length(strlen(str))); + int64_t dst_len = base64_decode(str, strlen(str), (void *)dst, nullptr, 0); + row->writeData(reinterpret_cast(dst), field->type(), field->pack_length(), + dst_len); + // 释放内存 + free(dst); + } + return RC::SUCCESS; + } + +private: + /** + * @brief 判断字符串是否为科学计数法 + * @param str 字符串 + * @return 是否为科学计数法 + */ + bool isScientificNotation(const char *str) + { + bool hasE = false; + bool hasDigits = false; + while (*str) { + if (*str == 'E' || *str == 'e') { + if (hasE) // 如果有多于一个E + return false; + hasE = true; + } else if (isdigit(*str) || *str == '.' || *str == '+' || *str == '-') { + if (isdigit(*str)) + hasDigits = true; + } else { + return false; + } + ++str; + } + return hasE && hasDigits; + } + + /** + * @brief 将科学计数法转换为十进制 + * @param str 科学计数法字符串 + * @return 十进制字符串 + */ + std::string convertScientificToDecimal(const char *str) + { + std::string input(str); + size_t ePos = input.find_first_of("Ee"); + if (ePos == std::string::npos) { + return input; + } + + // base part + std::string base = input.substr(0, ePos); + + // exp part + std::string expStr = input.substr(ePos + 1); + int exp = std::stoi(expStr); + + // 移除基数中的小数点 + size_t dotPos = base.find('.'); + std::string cleanBase = base; + int decimalPlaces = 0; + + if (dotPos != std::string::npos) { + cleanBase.erase(dotPos, 1); + decimalPlaces = base.length() - dotPos - 1; + exp -= decimalPlaces; // adjust exp + } + + // 在末尾添加所需的零 + if (exp >= 0) { + return cleanBase + std::string(exp, '0'); + } else { + // 处理负指数的情况 + std::string result = "0."; + result += std::string(-exp - 1, '0'); + result += cleanBase; + return result; + } + } +}; + +/** + * @brief 创建工厂类管理处理器 + */ +class DataHandlerFactory { +public: + explicit DataHandlerFactory() + { + handlers_[DataMeta_LongVal] = std::make_unique(); + handlers_[DataMeta_DoubleVal] = std::make_unique(); + handlers_[DataMeta_StringVal] = std::make_unique(); + } + + FieldDataHandler *getHandler(loft::DataMeta type) const + { + auto it = handlers_.find(type); + return it != handlers_.end() ? it->second.get() : nullptr; + } + +private: + std::map> handlers_; +}; diff --git a/binlogconvert/include/events/abstract_event.h b/binlogconvert/include/events/abstract_event.h new file mode 100644 index 0000000000000000000000000000000000000000..5f5bc9be88ad0a4f3c4f62db5e2e808eb5a2c2fc --- /dev/null +++ b/binlogconvert/include/events/abstract_event.h @@ -0,0 +1,245 @@ +#pragma once + +#include "common/init_setting.h" +#include "common/macros.h" +#include "common/mysql_constant_def.h" +#include "common/type_def.h" + +#include "basic_ostream.h" + +/** + * @brief 日志事件类型 + */ +enum Log_event_type { + + UNKNOWN_EVENT = 0, + /* + 自 mysql_helper 8.0.2 起已弃用。它只是一个占位符, + 不应该在其他任何地方使用。 + */ + START_EVENT_V3 = 1, + QUERY_EVENT = 2, + STOP_EVENT = 3, + ROTATE_EVENT = 4, + INTVAR_EVENT = 5, + + SLAVE_EVENT = 7, + + APPEND_BLOCK_EVENT = 9, + DELETE_FILE_EVENT = 11, + + RAND_EVENT = 13, + USER_VAR_EVENT = 14, + FORMAT_DESCRIPTION_EVENT = 15, + XID_EVENT = 16, + BEGIN_LOAD_QUERY_EVENT = 17, + EXECUTE_LOAD_QUERY_EVENT = 18, + + TABLE_MAP_EVENT = 19, + + /** + V1 行事件编号从 5.1.16 到 mysql_helper-5.6 使用。 + */ + WRITE_ROWS_EVENT_V1 = 23, + UPDATE_ROWS_EVENT_V1 = 24, + DELETE_ROWS_EVENT_V1 = 25, + + /** + 主服务器上发生了异常情况 + */ + INCIDENT_EVENT = 26, + + /** + 主服务器在空闲时发送的心跳事件, + 以确保从服务器知道主服务器的在线状态 + */ + HEARTBEAT_LOG_EVENT = 27, + + /** + 在某些情况下,有必要向从服务器发送可忽略的数据: + 从服务器可以处理的数据,如果有代码处理它, + 但如果不被识别,可以忽略。 + */ + IGNORABLE_LOG_EVENT = 28, + ROWS_QUERY_LOG_EVENT = 29, + + /** Version 2 of the Row events */ + WRITE_ROWS_EVENT = 30, + UPDATE_ROWS_EVENT = 31, + DELETE_ROWS_EVENT = 32, + + GTID_LOG_EVENT = 33, + ANONYMOUS_GTID_LOG_EVENT = 34, + + PREVIOUS_GTIDS_LOG_EVENT = 35, + + TRANSACTION_CONTEXT_EVENT = 36, + + VIEW_CHANGE_EVENT = 37, + + /* Prepared XA transaction terminal event similar to Xid */ + XA_PREPARE_LOG_EVENT = 38, + + /** + UPDATE_ROWS_EVENT 的扩展,允许根据 binlog_row_value_options + 设置部分值。 + */ + PARTIAL_UPDATE_ROWS_EVENT = 39, + + TRANSACTION_PAYLOAD_EVENT = 40, + + HEARTBEAT_LOG_EVENT_V2 = 41, + /** + 在此处添加新事件 - 就在此注释上方! + 现有事件(除 ENUM_END_EVENT 外)不应更改其编号 + */ + ENUM_END_EVENT /* end marker */ +}; + +enum enum_binlog_checksum_alg { + BINLOG_CHECKSUM_ALG_OFF = 0, + BINLOG_CHECKSUM_ALG_CRC32 = 1, + BINLOG_CHECKSUM_ALG_ENUM_END, + BINLOG_CHECKSUM_ALG_UNDEF = 255 +}; + +class AbstractEvent; + +/** + * @brief 日志事件的通用头部 + */ +class EventCommonHeader { +public: + EventCommonHeader(time_t i_ts_arg, Log_event_type type_code_arg = ENUM_END_EVENT) + : type_code_(type_code_arg) + , timestamp_(i_ts_arg) + , data_written_(0) + , log_pos_(0) + , flags_(0) + {} + ~EventCommonHeader() = default; + +public: + time_t timestamp_; // 这里用 timestamp 代替 timeval + Log_event_type type_code_; // 事件类型 + uint32 unmasked_server_id_; // 服务器id + // 在构造函数里暂时不用知道,直到 write-common-header 时外界会计算出 + // event_data_len + size_t data_written_{0}; // 写入的字节数 + uint64 log_pos_{0}; // 日志位置 + uint16 flags_{0}; // 标志位表示是否正常关闭,默认关闭是 0 +}; + +class AbstractEvent { +public: + static const int LOG_EVENT_TYPES = (ENUM_END_EVENT - 1); + + // 每个 event 的 post-header 长度 + enum enum_post_header_length { + // where 3.23, 4.x and 5.0 agree + QUERY_HEADER_MINIMAL_LEN = (4 + 4 + 1 + 2), + // where 5.0 differs: 2 for length of N-bytes vars. + QUERY_HEADER_LEN = (QUERY_HEADER_MINIMAL_LEN + 2), + STOP_HEADER_LEN = 0, + START_V3_HEADER_LEN = (2 + ST_SERVER_VER_LEN + 4), + // this is FROZEN (the Rotate post-header is frozen) + ROTATE_HEADER_LEN = 8, + INTVAR_HEADER_LEN = 0, + APPEND_BLOCK_HEADER_LEN = 4, + DELETE_FILE_HEADER_LEN = 4, + RAND_HEADER_LEN = 0, + USER_VAR_HEADER_LEN = 0, + FORMAT_DESCRIPTION_HEADER_LEN = (START_V3_HEADER_LEN + 1 + LOG_EVENT_TYPES), + XID_HEADER_LEN = 0, + BEGIN_LOAD_QUERY_HEADER_LEN = APPEND_BLOCK_HEADER_LEN, + ROWS_HEADER_LEN_V1 = 8, + TABLE_MAP_HEADER_LEN = 8, + EXECUTE_LOAD_QUERY_EXTRA_HEADER_LEN = (4 + 4 + 4 + 1), + EXECUTE_LOAD_QUERY_HEADER_LEN = (QUERY_HEADER_LEN + EXECUTE_LOAD_QUERY_EXTRA_HEADER_LEN), + INCIDENT_HEADER_LEN = 2, + HEARTBEAT_HEADER_LEN = 0, + IGNORABLE_HEADER_LEN = 0, + ROWS_HEADER_LEN_V2 = 10, + TRANSACTION_CONTEXT_HEADER_LEN = 18, + VIEW_CHANGE_HEADER_LEN = 52, + XA_PREPARE_HEADER_LEN = 0, + TRANSACTION_PAYLOAD_HEADER_LEN = 0, + }; // end enum_post_header_length + + explicit AbstractEvent(Log_event_type type_code) { type_code_ = type_code; } + virtual ~AbstractEvent() = default; + + DISALLOW_COPY(AbstractEvent); + AbstractEvent(AbstractEvent &&) = default; + AbstractEvent &operator=(AbstractEvent &&) = default; + + enum Log_event_type get_type_code() { return type_code_; } + + /** + * @brief 1. 对于复杂的 event 类型,event_data_size 写入 + * common-header时,会在具体的 write() 里同步计算后写入 + * 2. 对于简单的 event,可以直接调用 + * event_data_size(),是一个确定的值 + * @return + */ + virtual size_t get_data_size() { return 0; } + + /** + * @brief 有 Gtid_log_event,Table_map_log_event,Rows_log_event 会实现 + * @return + */ + virtual bool write_data_header(Basic_ostream *) { return true; } + + /** + * @brief 有 + * Gtid_log_event,Previous_gtids_log_event,Table_map_log_event,Rows_log_event + * 会实现 + * @return + */ + virtual bool write_data_body(Basic_ostream *) { return true; } + + bool write_common_header(Basic_ostream *ostream, size_t event_data_length); + + /** + * @brief 直接写 event 到文件流中 + * @param ostream + */ + virtual bool write(Basic_ostream *ostream) + { + return write_common_header(ostream, get_data_size()) && write_data_header(ostream) && + write_data_body(ostream); + } + + /** + * @brief 改造 write 的逻辑:写入到 buffer 中,返回写入的字节数 + * @param buffer + */ + virtual size_t write_to_buffer(uchar *buffer) + { + size_t pos = 0; + // 1. 写通用头部 + pos += write_common_header_to_buffer(buffer); + // 2. 写数据头部 + pos += write_data_header_to_buffer(buffer + pos); + // 3. 写真实数据 + pos += write_data_body_to_buffer(buffer + pos); + return pos; + } + +protected: + static const uint32 POSITION_PLACEHOLDER = 0; // common-header 的 log_pos_ 占位符 + virtual size_t write_common_header_to_buffer(uchar *buffer); + virtual size_t write_data_header_to_buffer(uchar *buffer) { return 0; } + virtual size_t write_data_body_to_buffer(uchar *buffer) { return 0; } + + time_t get_common_header_time(); + +private: + uint32 write_common_header_to_memory(uchar *buf); + +public: + std::unique_ptr common_header_; + + enum Log_event_type type_code_ = UNKNOWN_EVENT; + bool query_start_usec_used_ = true; +}; diff --git a/binlogconvert/include/events/control_events.h b/binlogconvert/include/events/control_events.h new file mode 100644 index 0000000000000000000000000000000000000000..e5e9b757d57b21381daf9dc087ced1c277e39874 --- /dev/null +++ b/binlogconvert/include/events/control_events.h @@ -0,0 +1,278 @@ +#pragma once + +#include // gettimeofday() + +#include +#include + +#include "events/abstract_event.h" +#include "utils/rpl_gtid.h" + +/* + +###### ##### ###### +# # # # +##### # # ##### +# # # # +# # # # +# ##### ###### + +*/ +class Format_description_event : public AbstractEvent { +public: + Format_description_event(uint8 binlog_ver, const char *server_ver); + ~Format_description_event() override; + + DISALLOW_COPY(Format_description_event); + + // ********* impl virtual function ********************* + size_t get_data_size() override { return AbstractEvent::FORMAT_DESCRIPTION_HEADER_LEN; } + bool write(Basic_ostream *ostream) override; + +private: + time_t get_fde_create_time(); + +public: + uint16 binlog_version_; + /* 每个版本的固定值,不可修改,否则在 replication 时会出错, 目前暂时为 empty + */ + char server_version_[ST_SERVER_VER_LEN]{}; + time_t create_timestamp_; + uint8 common_header_len_; // 固定为 19U + std::vector post_header_len_; + + uint8 number_of_event_types; +}; + +/* + + + #### ##### # ##### ###### +# # # # # # # +# # # # # ##### +# ### # # # # # +# # # # # # # + #### # # ##### ###### + ####### + ~ +*/ + +struct gtid_info { + int32_t rpl_gtid_sidno; + int64_t rpl_gtid_gno; +}; + +class Gtid_event : public AbstractEvent { +public: + Gtid_event(int64 last_committed_arg, int64 sequence_number_arg, bool may_have_sbr_stmts_arg, + uint64 original_commit_timestamp_arg, uint64 immediate_commit_timestamp_arg, + uint32 original_server_version_arg, uint32 immediate_server_version_arg); + + ~Gtid_event() override; + DISALLOW_COPY(Gtid_event); + + // ********* impl virtual function ********************* + size_t get_data_size() override; + size_t write_data_header_to_buffer(uchar *buffer) override; + size_t write_data_body_to_buffer(uchar *buffer) override; + + int64 get_gno() const { return gtid_info_struct.rpl_gtid_gno; } + + /* We have only original commit timestamp if both timestamps are equal. */ + int get_commit_timestamp_length() const + { + if (original_commit_timestamp_ != immediate_commit_timestamp_) { + return FULL_COMMIT_TIMESTAMP_LENGTH; + } + return ORIGINAL_COMMIT_TIMESTAMP_LENGTH; + } + + /** + We only store the immediate_server_version if both server versions are the + same. + */ + int get_server_version_length() const + { + if (original_server_version_ != immediate_server_version_) { + return FULL_SERVER_VERSION_LENGTH; + } + return IMMEDIATE_SERVER_VERSION_LENGTH; + } + +private: + /** + * @brief 把 gtid event 的 数据头部写入 + * @param buffer 待写入的 buffer 首地址 + * @return 固定长度是 Gtid_log_event::POST_HEADER_LENGTH. + */ + uint32 write_post_header_to_memory(uchar *buffer); + + /** + * @brief 把 gtid event 的 数据主体写入 + * @param buffer + * @return 写入的 data-body 字节数 + */ + uint32 write_body_to_memory(uchar *buffer); + +public: + static const int ENCODED_FLAG_LENGTH = 1; + static const int ENCODED_SID_LENGTH = 16; // Uuid::BYTE_LENGTH; + static const int ENCODED_GNO_LENGTH = 8; + /// Length of typecode for logical timestamps. + static const int LOGICAL_TIMESTAMP_TYPECODE_LENGTH = 1; + /// Length of two logical timestamps. + static const int LOGICAL_TIMESTAMP_LENGTH = 16; + // Type code used before the logical timestamps. + static const int LOGICAL_TIMESTAMP_TYPECODE = 2; + + static const int IMMEDIATE_COMMIT_TIMESTAMP_LENGTH = 7; + static const int ORIGINAL_COMMIT_TIMESTAMP_LENGTH = 7; + // Length of two timestamps (from original/immediate masters) + static const int FULL_COMMIT_TIMESTAMP_LENGTH = + IMMEDIATE_COMMIT_TIMESTAMP_LENGTH + ORIGINAL_COMMIT_TIMESTAMP_LENGTH; + // We use 7 bytes out of which 1 bit is used as a flag. + static const int ENCODED_COMMIT_TIMESTAMP_LENGTH = 55; + // Minimum and maximum lengths of transaction length field. + static const int TRANSACTION_LENGTH_MIN_LENGTH = 1; + static const int TRANSACTION_LENGTH_MAX_LENGTH = 9; + /// Length of original_server_version + static const int ORIGINAL_SERVER_VERSION_LENGTH = 4; + /// Length of immediate_server_version + static const int IMMEDIATE_SERVER_VERSION_LENGTH = 4; + /// Length of original and immediate server version + static const int FULL_SERVER_VERSION_LENGTH = + ORIGINAL_SERVER_VERSION_LENGTH + IMMEDIATE_SERVER_VERSION_LENGTH; + // We use 4 bytes out of which 1 bit is used as a flag. + static const int ENCODED_SERVER_VERSION_LENGTH = 31; + + /* + 第一个 bit 表示是否 启用 sync + 后 63 bit 表示 ticket value + */ + static constexpr int COMMIT_GROUP_TICKET_LENGTH = 8; + + static constexpr std::uint64_t kGroupTicketUnset = 0; + + gtid_info gtid_info_struct{}; + + /* Minimum GNO expected in a serialized GTID event */ + static const int64 MIN_GNO = 1; + /// One-past-the-max value of GNO + static const int64 GNO_END = INT64_MAX; + + /// Total length of post header + static const int POST_HEADER_LENGTH = + ENCODED_FLAG_LENGTH + /* flags */ + ENCODED_SID_LENGTH + /* SID length */ + ENCODED_GNO_LENGTH + /* GNO length */ + LOGICAL_TIMESTAMP_TYPECODE_LENGTH + /* length of typecode */ + LOGICAL_TIMESTAMP_LENGTH; /* length of two logical timestamps */ + + /* + We keep the commit timestamps in the body section because they can be of + variable length. + On the originating master, the event has only one timestamp as the two + timestamps are equal. On every other server we have two timestamps. + */ + static const int MAX_DATA_LENGTH = FULL_COMMIT_TIMESTAMP_LENGTH + + TRANSACTION_LENGTH_MAX_LENGTH + FULL_SERVER_VERSION_LENGTH + + COMMIT_GROUP_TICKET_LENGTH; /* 64-bit unsigned integer */ + + static const int MAX_EVENT_LENGTH = LOG_EVENT_HEADER_LEN + POST_HEADER_LENGTH + MAX_DATA_LENGTH; + +public: + long long int last_committed_; + long long int sequence_number_; + /** GTID flags constants */ + const unsigned char FLAG_MAY_HAVE_SBR = 1; + /** Transaction might have changes logged with SBR */ + bool may_have_sbr_stmts_; + /** Timestamp when the transaction was committed on the originating master. + */ + unsigned long long int original_commit_timestamp_; + /** Timestamp when the transaction was committed on the nearest master. */ + unsigned long long int immediate_commit_timestamp_; + bool has_commit_timestamps{}; + /** The length of the transaction in bytes. */ + unsigned long long int transaction_length_; + + Gtid_specification spec_; + /// SID for this GTID. + rpl_sid sid_; + + /** The version of the server where the transaction was originally executed + */ + uint32_t original_server_version_; + /** The version of the immediate server */ + uint32_t immediate_server_version_; +}; + +/* + * +# # # ##### ###### + # # # # # # + ## # # # ##### + ## # # # # + # # # # # # +# # # ##### ###### + ####### + ~ ~ + */ +class Xid_event : public AbstractEvent { +public: + Xid_event(uint64 xid_arg, uint64 original_commit_timestamp_arg); + ~Xid_event() override = default; + DISALLOW_COPY(Xid_event); + + // ********* impl virtual function ********************* + size_t get_data_size() override { return sizeof(xid_); } + + size_t write_data_header_to_buffer(uchar *buffer) override; + size_t write_data_body_to_buffer(uchar *buffer) override; + +private: + uint64 xid_; +}; + +/** + +##### #### ##### ## ##### ###### ###### +# # # # # # # # # # +# # # # # # # # ##### ##### +##### # # # ###### # # # +# # # # # # # # # # +# # #### # # # # ###### ###### + ####### + + */ +class Rotate_event : public AbstractEvent { +public: + Rotate_event(const std::string &new_log_ident_arg, size_t ident_len_arg, uint32 flags_arg, + uint64 pos_arg); + ~Rotate_event() override = default; // 使用 string 自动管理 file_name 内存 + DISALLOW_COPY(Rotate_event); + + // ********* impl virtual function ********************* + size_t get_data_size() override { return ident_len_ + ROTATE_HEADER_LEN; } + + bool write_data_header(Basic_ostream *stream) override; + bool write_data_body(Basic_ostream *stream) override; + +public: + const std::string new_log_ident_; // nxt binlog file_name + size_t ident_len_; // nxt file_name length + uint32 flags_; + uint64 pos_; + + enum { + /* Values taken by the flag member variable */ + DUP_NAME = 2, // if constructor should dup the string argument + RELAY_LOG = 4 // rotate event for the relay log + }; + + enum { + /* Rotate event post_header */ + R_POS_OFFSET = 0, + R_IDENT_OFFSET = 8 + }; +}; diff --git a/binlogconvert/include/events/rows_event.h b/binlogconvert/include/events/rows_event.h new file mode 100644 index 0000000000000000000000000000000000000000..7287a87d492134be3a57889eff00ae6f153b6dea --- /dev/null +++ b/binlogconvert/include/events/rows_event.h @@ -0,0 +1,57 @@ +#pragma once + +#include +#include + +#include "events/abstract_event.h" +#include "sql/mysql_fields.h" +#include "utils/table_id.h" + +class Table_map_event : public AbstractEvent { +public: + Table_map_event(const Table_id &tid, uint64 colcnt, const char *dbnam, size_t dblen, + const char *tblnam, size_t tbllen, + const std::vector &column_view, + uint64 original_commit_timestamp_arg); + ~Table_map_event() override; + DISALLOW_COPY(Table_map_event); + + // ********* impl virtual function ********************* + size_t get_data_size() override { return m_data_size_; } + + size_t write_data_header_to_buffer(uchar *buffer) override; + size_t write_data_body_to_buffer(uchar *buffer) override; + + int save_field_metadata(); + + /** Constants representing offsets */ + enum Table_map_event_offset { + /** TM = "Table Map" */ + TM_MAPID_OFFSET = 0, + TM_FLAGS_OFFSET = 6 + }; + + /** Event post header contents */ + Table_id m_table_id_; + typedef uint16 flag_set; + flag_set m_flags = 0; // 目前的 8.0 版本默认是 0 + + size_t m_data_size_; /** event data size */ + + /** Event body contents */ + std::string m_dbnam_; + unsigned long long int m_dblen_; + std::string m_tblnam_; + unsigned long long int m_tbllen_; + unsigned long m_colcnt_; + std::unique_ptr m_coltype_; + /** + The size of field metadata buffer set by calling save_field_metadata() + */ + unsigned long m_field_metadata_size_; + std::unique_ptr m_field_metadata_; + std::unique_ptr m_null_bits_; + + // ********* log event field ********************* + std::vector m_column_view_; // Table field set +}; diff --git a/binlogconvert/include/events/statement_events.h b/binlogconvert/include/events/statement_events.h new file mode 100644 index 0000000000000000000000000000000000000000..1dc56c0ea7f236504dfde0e5c5956e63955cccfa --- /dev/null +++ b/binlogconvert/include/events/statement_events.h @@ -0,0 +1,102 @@ +#pragma once + +#include "common/init_setting.h" +#include "common/macros.h" +#include "events/abstract_event.h" +#include + +class Query_event : public AbstractEvent { +public: + /** query event post-header */ + enum Query_event_post_header_offset { + Q_THREAD_ID_OFFSET = 0, + Q_EXEC_TIME_OFFSET = 4, + Q_DB_LEN_OFFSET = 8, + Q_ERR_CODE_OFFSET = 9, + Q_STATUS_VARS_LEN_OFFSET = 11, + Q_DATA_OFFSET = QUERY_HEADER_LEN + }; + + /* these are codes, not offsets; not more than 256 values (1 byte). */ + // 和 event-body 有关 + enum Query_event_status_vars { + Q_FLAGS2_CODE = 0, + Q_SQL_MODE_CODE, + + Q_CATALOG_CODE, + Q_AUTO_INCREMENT, + Q_CHARSET_CODE, + Q_TIME_ZONE_CODE, + + Q_CATALOG_NZ_CODE, + Q_LC_TIME_NAMES_CODE, + Q_CHARSET_DATABASE_CODE, + Q_TABLE_MAP_FOR_UPDATE_CODE, + /* It is just a placeholder after 8.0.2*/ + Q_MASTER_DATA_WRITTEN_CODE, + Q_INVOKER, + + Q_UPDATED_DB_NAMES, + Q_MICROSECONDS, + + Q_COMMIT_TS, + Q_COMMIT_TS2, + + Q_EXPLICIT_DEFAULTS_FOR_TIMESTAMP, + /* + The variable carries xid info of 2pc-aware (recoverable) DDL queries. + */ + Q_DDL_LOGGED_WITH_XID, + + Q_DEFAULT_COLLATION_FOR_UTF8MB4, + + Q_SQL_REQUIRE_PRIMARY_KEY, + + Q_DEFAULT_TABLE_ENCRYPTION + }; + Query_event(const char *query_arg, const char *catalog_arg, const char *db_arg, + uint64 ddl_xid_arg, uint32 query_length, uint64 thread_id_arg, int32 errcode, + uint64 original_commit_timestamp_arg); + ~Query_event() override = default; + DISALLOW_COPY(Query_event); + + // ********* impl virtual function ********************* + size_t get_data_size() override + { + return AbstractEvent::QUERY_HEADER_LEN + status_vars_len_ + db_len_ + 1 + q_len_; + } + size_t write_data_header_to_buffer(uchar *buffer) override; + size_t write_data_body_to_buffer(uchar *buffer) override; + +private: + void calculate_status_vars_len(); + +public: + const char *query_; + const char *db_; + const char *catalog_; + /* data members defined in order they are packed and written into the log */ + uint32_t thread_id_; + uint32_t query_exec_time_; + size_t db_len_; + uint16_t error_code_; + uint16_t status_vars_len_; + size_t q_len_; + + bool flags2_inited = true; + bool sql_mode_inited = true; + bool charset_inited = true; // 三个编码集有关 + + uint32_t flags2 = 0; + size_t catalog_len = 0; // <= 255 char; 0 means uninited + + enum enum_ternary { TERNARY_UNSET, TERNARY_OFF, TERNARY_ON } explicit_defaults_ts; + + // 在类的成员变量中定义 + uint16 client_charset_ = 255; // 默认可以设置为33 (utf8mb4) + uint16 connection_collation_ = 255; // MySQL 8.0 默认 (utf8mb4_general_ci) utf8mb4_0900_ai_ci + uint16 server_collation_ = 255; // 默认可以设置为255 utf8mb4_0900_ai_ci + + /* XID value when the event is a 2pc-capable DDL */ + uint64 ddl_xid; +}; diff --git a/binlogconvert/include/events/write_event.h b/binlogconvert/include/events/write_event.h new file mode 100644 index 0000000000000000000000000000000000000000..68032d111d21478c0e4647dec947d7919f701def --- /dev/null +++ b/binlogconvert/include/events/write_event.h @@ -0,0 +1,339 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/logging.h" +#include "events/abstract_event.h" +#include "utils/decimal.h" +#include "utils/little_endian.h" +#include "utils/my_time.h" +#include "utils/table_id.h" + +constexpr int TINY_SIZE = 1; +constexpr int SHORT_SIZE = 2; +constexpr int MEDIUMINT_SIZE = 3; +constexpr int INT_SIZE = 4; +constexpr int BIGINT_SIZE = 8; +constexpr int FLOAT_SIZE = 4; + +constexpr int YEAR_PREFIX_SIZE = 1; +constexpr int JSON_PREFIX_SIZE = 4; + +constexpr int DATE_BASE_SIZE = 3; +constexpr int TIME_BASE_SIZE = 3; +constexpr int TIMESTAMP_BASE_SIZE = 4; +constexpr int DATETIME_BASE_SIZE = 5; + +constexpr int TIME_EXTRA_SIZE_0 = 0; +constexpr int TIME_EXTRA_SIZE_1 = 1; +constexpr int TIME_EXTRA_SIZE_2 = 2; +constexpr int TIME_EXTRA_SIZE_3 = 3; +static constexpr int FLOATING_POINT_BUFFER{311 + DECIMAL_NOT_SPECIFIED}; + +class Rows_event : public AbstractEvent { +public: + Rows_event(const Table_id &tid, unsigned long wid, uint16 flag, Log_event_type type, + uint64 original_commit_timestamp_arg); + ~Rows_event() override; + DISALLOW_COPY(Rows_event); + + // ********* impl virtual function ********************* + size_t get_data_size() override { return calculate_event_size(); } + + size_t write_data_header_to_buffer(uchar *buffer) override; + size_t write_data_body_to_buffer(uchar *buffer) override; + + void Set_flags(uint16_t flags) { m_flags = flags; } + + void Set_width(unsigned long width) { m_width = width; } + + int Get_N() + { + int N = (m_width + 7) / 8; + return N; + } + + void cols_init(); + + /* + delete,update + */ + void set_null_before(std::vector &&t) { null_before = std::move(t); } + + /* + insert,update + */ + void set_null_after(std::vector &&t) { null_after = std::move(t); } + + /* + insert,update + */ + void set_rows_after(std::vector &&t) { this->rows_after = std::move(t); } + + /* + delete,update + */ + void set_rows_before(std::vector &&t) { this->rows_before = std::move(t); } + + void setBefore(bool is_before) { m_is_before = is_before; } + + /** + * @brief 每个 row value 连续追加写到 buf 中 + * @param buf + * @param data 实际值 + * @param capacity 目前 buf 已分配的容量 + * @param data_size 使用的大小 + * @param type Field type + * @param length Field 占的 byte 数 + * @param str_length 字符串长度 + * @param precision 精度 + * @param frac 小数点后的位数 + */ + void data_to_binary(std::unique_ptr &buf, uchar *data, size_t &capacity, + size_t &data_size, enum_field_types type, size_t length, size_t str_length, + int precision, int frac) + { + switch (type) { + // 固定长度类型 + case enum_field_types::MYSQL_TYPE_TINY: + handle_fixed_length(buf, data, capacity, data_size, TINY_SIZE); + break; + case enum_field_types::MYSQL_TYPE_SHORT: + handle_fixed_length(buf, data, capacity, data_size, SHORT_SIZE); + break; + case enum_field_types::MYSQL_TYPE_LONG: + handle_fixed_length(buf, data, capacity, data_size, INT_SIZE); + break; + case enum_field_types::MYSQL_TYPE_LONGLONG: + case enum_field_types::MYSQL_TYPE_DOUBLE: + handle_fixed_length(buf, data, capacity, data_size, BIGINT_SIZE); + break; + case enum_field_types::MYSQL_TYPE_INT24: + handle_fixed_length(buf, data, capacity, data_size, MEDIUMINT_SIZE); + break; + case enum_field_types::MYSQL_TYPE_FLOAT: + handle_fixed_length(buf, data, capacity, data_size, FLOAT_SIZE); + break; + + // 字符串类型 + case enum_field_types::MYSQL_TYPE_VARCHAR: + case enum_field_types::MYSQL_TYPE_STRING: + handle_string_type(buf, data, capacity, data_size, length, str_length); + break; + + // binary 类型 + case enum_field_types::MYSQL_TYPE_TINY_BLOB: + case enum_field_types::MYSQL_TYPE_MEDIUM_BLOB: + case enum_field_types::MYSQL_TYPE_BLOB: + case enum_field_types::MYSQL_TYPE_LONG_BLOB: + handle_prefixed_binary(buf, data, capacity, data_size, length, str_length); + break; + case enum_field_types::MYSQL_TYPE_JSON: + handle_prefixed_binary(buf, data, capacity, data_size, JSON_PREFIX_SIZE, + str_length); + break; + + // enum, set, bit类型 + case enum_field_types::MYSQL_TYPE_ENUM: + case enum_field_types::MYSQL_TYPE_SET: + handle_fixed_length(buf, data, capacity, data_size, length); + break; + case enum_field_types::MYSQL_TYPE_BIT: { + std::reverse(data, data + length); + handle_fixed_length(buf, data, capacity, data_size, length); + break; + } + + case enum_field_types::MYSQL_TYPE_NEWDECIMAL: { + decimal_t t; + size_t demi_size = dig2bytes[(precision - frac) % 9] + ((precision - frac) / 9) * 4 + + dig2bytes[frac % 9] + (frac / 9) * 4; + const char *from = reinterpret_cast(data); + const char *end_ptr = reinterpret_cast(data + str_length); + t.buf = new int32_t[precision / 9 + precision % 9]; + + string2decimal(from, &t, &end_ptr); + + buf_resize(buf, capacity, data_size, data_size + demi_size); + decimal2bin(&t, buf.get() + data_size, precision, frac); + data_size += demi_size; + delete[] t.buf; + break; + } + + // 时间类型 + case enum_field_types::MYSQL_TYPE_YEAR: + handle_fixed_length(buf, data, capacity, data_size, YEAR_PREFIX_SIZE); + break; + case enum_field_types::MYSQL_TYPE_DATE: + handle_time_type(buf, data, capacity, data_size, str_length, length, precision, + int_to_date, [](MYSQL_TIME *ltime, uchar *dst, int prec) { + long tmp = + ltime->day + ltime->month * 32 + ltime->year * 16 * 32; + int3store(dst, tmp); + }); + break; + case enum_field_types::MYSQL_TYPE_TIME: { + handle_time_type(buf, data, capacity, data_size, str_length, length, precision, + double_to_time, [](MYSQL_TIME *ltime, uchar *dst, int prec) { + longlong nr = TIME_to_longlong_time_packed(*ltime); + my_time_packed_to_binary(nr, dst, prec); + }); + break; + } + // datetime 和 timestamp 的 precision 传递的是 pack_len + case enum_field_types::MYSQL_TYPE_DATETIME: { + handle_time_type(buf, data, capacity, data_size, str_length, length, precision, + str_to_datetime, [](MYSQL_TIME *ltime, uchar *dst, int prec) { + longlong nr = TIME_to_longlong_datetime_packed(*ltime); + my_datetime_packed_to_binary(nr, dst, prec); + }); + break; + } + + case enum_field_types::MYSQL_TYPE_TIMESTAMP2: { + handle_time_type(buf, data, capacity, data_size, str_length, length, precision, + str_to_datetime, [](MYSQL_TIME *ltime, uchar *dst, int prec) { + my_timeval val; + ltime->hour += 8; + datetime_to_timeval(ltime, &val); + // val.m_tv_sec += 28800; + my_timestamp_to_binary(&val, dst, prec); + }); + break; + } + + default: + break; + } + } + + void write_data_before(uchar *data, enum_field_types type, size_t length = 0, + size_t str_length = 0, int precision = 0, int frac = 0) + { + data_to_binary(m_rows_before_buf, data, m_before_capacity, before_data_size_used, type, + length, str_length, precision, frac); + } + + void write_data_after(uchar *data, enum_field_types type, size_t length = 0, + size_t str_length = 0, int precision = 0, int frac = 0) + { + data_to_binary(m_rows_after_buf, data, m_after_capacity, after_data_size_used, type, length, + str_length, precision, frac); + } + + /** + * @brief 统一Rows event 的数据写入接口,被不同类型的 handler 调用 + */ + void writeData(uchar *data, enum_field_types type, size_t length = 0, size_t str_length = 0, + int precision = 0, int frac = 0) + { + if (m_is_before) { + write_data_before(data, type, length, str_length, precision, frac); + } else { + write_data_after(data, type, length, str_length, precision, frac); + } + } + +private: + /** + * @brief 动态申请额外的内存空间,避免每次都重新分配内存,再拷贝进去 + */ + void buf_resize(std::unique_ptr &buf, size_t &capacity, size_t current_size, + size_t needed_size); + + void double2demi(double num, decimal_t &t, int precision, int frac); + + size_t calculate_event_size(); + + /** + * @brief 处理固定长度类型 TINYINT/SHORT/INT/LONGLONG/FLOAT/DOUBLE/YEAR/DATE + */ + inline void handle_fixed_length(std::unique_ptr &buf, void *data, size_t &capacity, + size_t &data_size, size_t bytes) + { + buf_resize(buf, capacity, data_size, data_size + bytes); + memcpy(buf.get() + data_size, data, bytes); + data_size += bytes; + } + + /** + * @brief 处理变长字符串类型 CHAR/VARCHAR + */ + inline void handle_string_type(std::unique_ptr &buf, void *data, size_t &capacity, + size_t &data_size, size_t length, size_t str_length) + { + size_t len_bytes = length > 255 ? 2 : 1; + buf_resize(buf, capacity, data_size, data_size + str_length + len_bytes); + memcpy(buf.get() + data_size, &str_length, len_bytes); + data_size += len_bytes; + memcpy(buf.get() + data_size, data, str_length); + data_size += str_length; + } + + /** + * @brief 处理带长度前缀的二进制数据 TEXT/BLOB/JSON + */ + inline void handle_prefixed_binary(std::unique_ptr &buf, void *data, size_t &capacity, + size_t &data_size, size_t prefix_size, size_t str_length) + { + buf_resize(buf, capacity, data_size, data_size + str_length + prefix_size); + memcpy(buf.get() + data_size, &str_length, prefix_size); + data_size += prefix_size; + memcpy(buf.get() + data_size, data, str_length); + data_size += str_length; + } + + /** + * @brief 处理带时间类型 TIME/DATETIME/TIMESTAMP + */ + template + inline void handle_time_type(std::unique_ptr &buf, void *data, size_t &capacity, + size_t &data_size, size_t str_length, size_t length, int precision, + ParseFunc parse_func, ConvertFunc convert_func) + { + // 1. 计算时间字段所需的总字节数 + buf_resize(buf, capacity, data_size, data_size + length); + + // 2. 将字符串转换为时间对象 + MYSQL_TIME ltime; + parse_func(static_cast(data), str_length, <ime); + + // 3. 将时间对象转换为二进制表示 + convert_func(<ime, buf.get() + data_size, precision); + + data_size += length; + } + +private: + Table_id m_table_id; + uint16 m_flags; /** Flags for row-level events */ + Log_event_type m_type; + unsigned long m_width; + + std::unique_ptr columns_before_image; + std::unique_ptr columns_after_image; + std::unique_ptr row_bitmap_before; + std::unique_ptr row_bitmap_after; + + std::unique_ptr m_rows_before_buf; + std::unique_ptr m_rows_after_buf; + size_t m_before_capacity; // 当前已分配的容量 + size_t m_after_capacity; + size_t before_data_size_used; // 实际使用的大小 + size_t after_data_size_used; + + std::vector rows_before; + std::vector rows_after; + std::vector null_after; + std::vector null_before; + + bool m_is_before; +}; diff --git a/binlogconvert/include/format/ddl_generated.h b/binlogconvert/include/format/ddl_generated.h new file mode 100644 index 0000000000000000000000000000000000000000..d607660a9a92c66408af61f64dc25aff7a29dd09 --- /dev/null +++ b/binlogconvert/include/format/ddl_generated.h @@ -0,0 +1,254 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +#ifndef FLATBUFFERS_GENERATED_DDL_LOFT_H_ +#define FLATBUFFERS_GENERATED_DDL_LOFT_H_ + +#include "flatbuffers/flatbuffers.h" + +namespace loft { + +struct DDL; +struct DDLBuilder; + +struct DDL FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef DDLBuilder Builder; + + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_CHECK_POINT = 4, + VT_DB_NAME = 6, + VT_DDL_SQL = 8, + VT_DDL_TYPE = 10, + VT_LAST_COMMIT = 12, + VT_LSN = 14, + VT_MSG_TIME = 16, + VT_OP_TYPE = 18, + VT_SCN = 20, + VT_SEQ = 22, + VT_TABLE_ = 24, + VT_TX_SEQ = 26, + VT_TX_TIME = 28 + }; + + const ::flatbuffers::String *check_point() const + { + return GetPointer(VT_CHECK_POINT); + } + + const ::flatbuffers::String *db_name() const + { + return GetPointer(VT_DB_NAME); + } + + const ::flatbuffers::String *ddl_sql() const + { + return GetPointer(VT_DDL_SQL); + } + + const ::flatbuffers::String *ddl_type() const + { + return GetPointer(VT_DDL_TYPE); + } + + int64_t last_commit() const { return GetField(VT_LAST_COMMIT, 0); } + + int64_t lsn() const { return GetField(VT_LSN, 0); } + + const ::flatbuffers::String *msg_time() const + { + return GetPointer(VT_MSG_TIME); + } + + const ::flatbuffers::String *op_type() const + { + return GetPointer(VT_OP_TYPE); + } + + int64_t scn() const { return GetField(VT_SCN, 0); } + + int64_t seq() const { return GetField(VT_SEQ, 0); } + + const ::flatbuffers::String *table_() const + { + return GetPointer(VT_TABLE_); + } + + int64_t tx_seq() const { return GetField(VT_TX_SEQ, 0); } + + const ::flatbuffers::String *tx_time() const + { + return GetPointer(VT_TX_TIME); + } + + bool Verify(::flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_CHECK_POINT) && + verifier.VerifyString(check_point()) && VerifyOffset(verifier, VT_DB_NAME) && + verifier.VerifyString(db_name()) && VerifyOffset(verifier, VT_DDL_SQL) && + verifier.VerifyString(ddl_sql()) && VerifyOffset(verifier, VT_DDL_TYPE) && + verifier.VerifyString(ddl_type()) && + VerifyField(verifier, VT_LAST_COMMIT, 8) && + VerifyField(verifier, VT_LSN, 8) && VerifyOffset(verifier, VT_MSG_TIME) && + verifier.VerifyString(msg_time()) && VerifyOffset(verifier, VT_OP_TYPE) && + verifier.VerifyString(op_type()) && VerifyField(verifier, VT_SCN, 8) && + VerifyField(verifier, VT_SEQ, 8) && VerifyOffset(verifier, VT_TABLE_) && + verifier.VerifyString(table_()) && VerifyField(verifier, VT_TX_SEQ, 8) && + VerifyOffset(verifier, VT_TX_TIME) && verifier.VerifyString(tx_time()) && + verifier.EndTable(); + } +}; + +struct DDLBuilder { + typedef DDL Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + + void add_check_point(::flatbuffers::Offset<::flatbuffers::String> check_point) + { + fbb_.AddOffset(DDL::VT_CHECK_POINT, check_point); + } + + void add_db_name(::flatbuffers::Offset<::flatbuffers::String> db_name) + { + fbb_.AddOffset(DDL::VT_DB_NAME, db_name); + } + + void add_ddl_sql(::flatbuffers::Offset<::flatbuffers::String> ddl_sql) + { + fbb_.AddOffset(DDL::VT_DDL_SQL, ddl_sql); + } + + void add_ddl_type(::flatbuffers::Offset<::flatbuffers::String> ddl_type) + { + fbb_.AddOffset(DDL::VT_DDL_TYPE, ddl_type); + } + + void add_last_commit(int64_t last_commit) + { + fbb_.AddElement(DDL::VT_LAST_COMMIT, last_commit, 0); + } + + void add_lsn(int64_t lsn) { fbb_.AddElement(DDL::VT_LSN, lsn, 0); } + + void add_msg_time(::flatbuffers::Offset<::flatbuffers::String> msg_time) + { + fbb_.AddOffset(DDL::VT_MSG_TIME, msg_time); + } + + void add_op_type(::flatbuffers::Offset<::flatbuffers::String> op_type) + { + fbb_.AddOffset(DDL::VT_OP_TYPE, op_type); + } + + void add_scn(int64_t scn) { fbb_.AddElement(DDL::VT_SCN, scn, 0); } + + void add_seq(int64_t seq) { fbb_.AddElement(DDL::VT_SEQ, seq, 0); } + + void add_table_(::flatbuffers::Offset<::flatbuffers::String> table_) + { + fbb_.AddOffset(DDL::VT_TABLE_, table_); + } + + void add_tx_seq(int64_t tx_seq) { fbb_.AddElement(DDL::VT_TX_SEQ, tx_seq, 0); } + + void add_tx_time(::flatbuffers::Offset<::flatbuffers::String> tx_time) + { + fbb_.AddOffset(DDL::VT_TX_TIME, tx_time); + } + + explicit DDLBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset +CreateDDL(::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::String> check_point = 0, + ::flatbuffers::Offset<::flatbuffers::String> db_name = 0, + ::flatbuffers::Offset<::flatbuffers::String> ddl_sql = 0, + ::flatbuffers::Offset<::flatbuffers::String> ddl_type = 0, int64_t last_commit = 0, + int64_t lsn = 0, ::flatbuffers::Offset<::flatbuffers::String> msg_time = 0, + ::flatbuffers::Offset<::flatbuffers::String> op_type = 0, int64_t scn = 0, + int64_t seq = 0, ::flatbuffers::Offset<::flatbuffers::String> table_ = 0, + int64_t tx_seq = 0, ::flatbuffers::Offset<::flatbuffers::String> tx_time = 0) +{ + DDLBuilder builder_(_fbb); + builder_.add_tx_seq(tx_seq); + builder_.add_seq(seq); + builder_.add_scn(scn); + builder_.add_lsn(lsn); + builder_.add_last_commit(last_commit); + builder_.add_tx_time(tx_time); + builder_.add_table_(table_); + builder_.add_op_type(op_type); + builder_.add_msg_time(msg_time); + builder_.add_ddl_type(ddl_type); + builder_.add_ddl_sql(ddl_sql); + builder_.add_db_name(db_name); + builder_.add_check_point(check_point); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset +CreateDDLDirect(::flatbuffers::FlatBufferBuilder &_fbb, const char *check_point = nullptr, + const char *db_name = nullptr, const char *ddl_sql = nullptr, + const char *ddl_type = nullptr, int64_t last_commit = 0, int64_t lsn = 0, + const char *msg_time = nullptr, const char *op_type = nullptr, int64_t scn = 0, + int64_t seq = 0, const char *table_ = nullptr, int64_t tx_seq = 0, + const char *tx_time = nullptr) +{ + auto check_point__ = check_point ? _fbb.CreateString(check_point) : 0; + auto db_name__ = db_name ? _fbb.CreateString(db_name) : 0; + auto ddl_sql__ = ddl_sql ? _fbb.CreateString(ddl_sql) : 0; + auto ddl_type__ = ddl_type ? _fbb.CreateString(ddl_type) : 0; + auto msg_time__ = msg_time ? _fbb.CreateString(msg_time) : 0; + auto op_type__ = op_type ? _fbb.CreateString(op_type) : 0; + auto table___ = table_ ? _fbb.CreateString(table_) : 0; + auto tx_time__ = tx_time ? _fbb.CreateString(tx_time) : 0; + return loft::CreateDDL(_fbb, check_point__, db_name__, ddl_sql__, ddl_type__, last_commit, lsn, + msg_time__, op_type__, scn, seq, table___, tx_seq, tx_time__); +} + +inline const loft::DDL *GetDDL(const void *buf) +{ + return ::flatbuffers::GetRoot(buf); +} + +inline const loft::DDL *GetSizePrefixedDDL(const void *buf) +{ + return ::flatbuffers::GetSizePrefixedRoot(buf); +} + +inline bool VerifyDDLBuffer(::flatbuffers::Verifier &verifier) +{ + return verifier.VerifyBuffer(nullptr); +} + +inline bool VerifySizePrefixedDDLBuffer(::flatbuffers::Verifier &verifier) +{ + return verifier.VerifySizePrefixedBuffer(nullptr); +} + +inline void FinishDDLBuffer(::flatbuffers::FlatBufferBuilder &fbb, + ::flatbuffers::Offset root) +{ + fbb.Finish(root); +} + +inline void FinishSizePrefixedDDLBuffer(::flatbuffers::FlatBufferBuilder &fbb, + ::flatbuffers::Offset root) +{ + fbb.FinishSizePrefixed(root); +} + +} // namespace loft + +#endif // FLATBUFFERS_GENERATED_DDL_LOFT_H_ diff --git a/binlogconvert/include/format/dml_generated.h b/binlogconvert/include/format/dml_generated.h new file mode 100644 index 0000000000000000000000000000000000000000..a3c4a8aa2199837fbc2a0d0105a46a4e1e27bf33 --- /dev/null +++ b/binlogconvert/include/format/dml_generated.h @@ -0,0 +1,883 @@ +// automatically generated by the FlatBuffers compiler, do not modify + +#ifndef FLATBUFFERS_GENERATED_DML_LOFT_H_ +#define FLATBUFFERS_GENERATED_DML_LOFT_H_ + +#include "flatbuffers/flatbuffers.h" + +namespace loft { + +struct FieldMeta; +struct FieldMetaBuilder; + +struct Field; +struct FieldBuilder; + +struct LongVal; +struct LongValBuilder; + +struct DoubleVal; +struct DoubleValBuilder; + +struct StringVal; +struct StringValBuilder; + +struct kvPair; +struct kvPairBuilder; + +struct DML; +struct DMLBuilder; + +enum DataType : int8_t { + DataType_None = 0, + DataType_LongData = 1, + DataType_DoubleData = 2, + DataType_StringData = 3, + DataType_MIN = DataType_None, + DataType_MAX = DataType_StringData +}; + +inline const DataType (&EnumValuesDataType())[4] +{ + static const DataType values[] = {DataType_None, DataType_LongData, DataType_DoubleData, + DataType_StringData}; + return values; +} + +inline const char *const *EnumNamesDataType() +{ + static const char *const names[5] = {"None", "LongData", "DoubleData", "StringData", nullptr}; + return names; +} + +inline const char *EnumNameDataType(DataType e) +{ + if (::flatbuffers::IsOutRange(e, DataType_None, DataType_StringData)) { + return ""; + } + const size_t index = static_cast(e); + return EnumNamesDataType()[index]; +} + +enum DataMeta : uint8_t { + DataMeta_NONE = 0, + DataMeta_LongVal = 1, + DataMeta_DoubleVal = 2, + DataMeta_StringVal = 3, + DataMeta_MIN = DataMeta_NONE, + DataMeta_MAX = DataMeta_StringVal +}; + +inline const DataMeta (&EnumValuesDataMeta())[4] +{ + static const DataMeta values[] = {DataMeta_NONE, DataMeta_LongVal, DataMeta_DoubleVal, + DataMeta_StringVal}; + return values; +} + +inline const char *const *EnumNamesDataMeta() +{ + static const char *const names[5] = {"NONE", "LongVal", "DoubleVal", "StringVal", nullptr}; + return names; +} + +inline const char *EnumNameDataMeta(DataMeta e) +{ + if (::flatbuffers::IsOutRange(e, DataMeta_NONE, DataMeta_StringVal)) { + return ""; + } + const size_t index = static_cast(e); + return EnumNamesDataMeta()[index]; +} + +template struct DataMetaTraits { + static const DataMeta enum_value = DataMeta_NONE; +}; + +template <> struct DataMetaTraits { + static const DataMeta enum_value = DataMeta_LongVal; +}; + +template <> struct DataMetaTraits { + static const DataMeta enum_value = DataMeta_DoubleVal; +}; + +template <> struct DataMetaTraits { + static const DataMeta enum_value = DataMeta_StringVal; +}; + +bool VerifyDataMeta(::flatbuffers::Verifier &verifier, const void *obj, DataMeta type); +bool VerifyDataMetaVector(::flatbuffers::Verifier &verifier, + const ::flatbuffers::Vector<::flatbuffers::Offset> *values, + const ::flatbuffers::Vector *types); + +struct FieldMeta FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef FieldMetaBuilder Builder; + + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_DATA_TYPE = 4, + VT_LENGTH = 6, + VT_PRECISION = 8, + VT_IS_UNSIGNED = 10, + VT_NULLABLE = 12, + VT_CSNAME = 14 + }; + + const ::flatbuffers::String *data_type() const + { + return GetPointer(VT_DATA_TYPE); + } + + int32_t length() const { return GetField(VT_LENGTH, 0); } + + int32_t precision() const { return GetField(VT_PRECISION, 0); } + + bool is_unsigned() const { return GetField(VT_IS_UNSIGNED, 0) != 0; } + + bool nullable() const { return GetField(VT_NULLABLE, 0) != 0; } + + const ::flatbuffers::String *csname() const + { + return GetPointer(VT_CSNAME); + } + + bool Verify(::flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_DATA_TYPE) && + verifier.VerifyString(data_type()) && VerifyField(verifier, VT_LENGTH, 4) && + VerifyField(verifier, VT_PRECISION, 4) && + VerifyField(verifier, VT_IS_UNSIGNED, 1) && + VerifyField(verifier, VT_NULLABLE, 1) && + VerifyOffset(verifier, VT_CSNAME) && verifier.VerifyString(csname()) && + verifier.EndTable(); + } +}; + +struct FieldMetaBuilder { + typedef FieldMeta Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + + void add_data_type(::flatbuffers::Offset<::flatbuffers::String> data_type) + { + fbb_.AddOffset(FieldMeta::VT_DATA_TYPE, data_type); + } + + void add_length(int32_t length) { fbb_.AddElement(FieldMeta::VT_LENGTH, length, 0); } + + void add_precision(int32_t precision) + { + fbb_.AddElement(FieldMeta::VT_PRECISION, precision, 0); + } + + void add_is_unsigned(bool is_unsigned) + { + fbb_.AddElement(FieldMeta::VT_IS_UNSIGNED, static_cast(is_unsigned), 0); + } + + void add_nullable(bool nullable) + { + fbb_.AddElement(FieldMeta::VT_NULLABLE, static_cast(nullable), 0); + } + + void add_csname(::flatbuffers::Offset<::flatbuffers::String> csname) + { + fbb_.AddOffset(FieldMeta::VT_CSNAME, csname); + } + + explicit FieldMetaBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset +CreateFieldMeta(::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::String> data_type = 0, int32_t length = 0, + int32_t precision = 0, bool is_unsigned = false, bool nullable = false, + ::flatbuffers::Offset<::flatbuffers::String> csname = 0) +{ + FieldMetaBuilder builder_(_fbb); + builder_.add_csname(csname); + builder_.add_precision(precision); + builder_.add_length(length); + builder_.add_data_type(data_type); + builder_.add_nullable(nullable); + builder_.add_is_unsigned(is_unsigned); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset +CreateFieldMetaDirect(::flatbuffers::FlatBufferBuilder &_fbb, const char *data_type = nullptr, + int32_t length = 0, int32_t precision = 0, bool is_unsigned = false, + bool nullable = false, const char *csname = nullptr) +{ + auto data_type__ = data_type ? _fbb.CreateString(data_type) : 0; + auto csname__ = csname ? _fbb.CreateString(csname) : 0; + return loft::CreateFieldMeta(_fbb, data_type__, length, precision, is_unsigned, nullable, + csname__); +} + +struct Field FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef FieldBuilder Builder; + + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_NAME = 4, VT_META = 6 }; + + const ::flatbuffers::String *name() const + { + return GetPointer(VT_NAME); + } + + const loft::FieldMeta *meta() const { return GetPointer(VT_META); } + + bool Verify(::flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_NAME) && + verifier.VerifyString(name()) && VerifyOffset(verifier, VT_META) && + verifier.VerifyTable(meta()) && verifier.EndTable(); + } +}; + +struct FieldBuilder { + typedef Field Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + + void add_name(::flatbuffers::Offset<::flatbuffers::String> name) + { + fbb_.AddOffset(Field::VT_NAME, name); + } + + void add_meta(::flatbuffers::Offset meta) + { + fbb_.AddOffset(Field::VT_META, meta); + } + + explicit FieldBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset +CreateField(::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::String> name = 0, + ::flatbuffers::Offset meta = 0) +{ + FieldBuilder builder_(_fbb); + builder_.add_meta(meta); + builder_.add_name(name); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset +CreateFieldDirect(::flatbuffers::FlatBufferBuilder &_fbb, const char *name = nullptr, + ::flatbuffers::Offset meta = 0) +{ + auto name__ = name ? _fbb.CreateString(name) : 0; + return loft::CreateField(_fbb, name__, meta); +} + +struct LongVal FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef LongValBuilder Builder; + + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_VALUE = 4 }; + + int64_t value() const { return GetField(VT_VALUE, 0); } + + bool Verify(::flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField(verifier, VT_VALUE, 8) && + verifier.EndTable(); + } +}; + +struct LongValBuilder { + typedef LongVal Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + + void add_value(int64_t value) { fbb_.AddElement(LongVal::VT_VALUE, value, 0); } + + explicit LongValBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateLongVal(::flatbuffers::FlatBufferBuilder &_fbb, + int64_t value = 0) +{ + LongValBuilder builder_(_fbb); + builder_.add_value(value); + return builder_.Finish(); +} + +struct DoubleVal FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef DoubleValBuilder Builder; + + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_VALUE = 4 }; + + double value() const { return GetField(VT_VALUE, 0.0); } + + bool Verify(::flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyField(verifier, VT_VALUE, 8) && + verifier.EndTable(); + } +}; + +struct DoubleValBuilder { + typedef DoubleVal Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + + void add_value(double value) { fbb_.AddElement(DoubleVal::VT_VALUE, value, 0.0); } + + explicit DoubleValBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateDoubleVal(::flatbuffers::FlatBufferBuilder &_fbb, + double value = 0.0) +{ + DoubleValBuilder builder_(_fbb); + builder_.add_value(value); + return builder_.Finish(); +} + +struct StringVal FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef StringValBuilder Builder; + + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_VALUE = 4 }; + + const ::flatbuffers::String *value() const + { + return GetPointer(VT_VALUE); + } + + bool Verify(::flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_VALUE) && + verifier.VerifyString(value()) && verifier.EndTable(); + } +}; + +struct StringValBuilder { + typedef StringVal Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + + void add_value(::flatbuffers::Offset<::flatbuffers::String> value) + { + fbb_.AddOffset(StringVal::VT_VALUE, value); + } + + explicit StringValBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset +CreateStringVal(::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::String> value = 0) +{ + StringValBuilder builder_(_fbb); + builder_.add_value(value); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset +CreateStringValDirect(::flatbuffers::FlatBufferBuilder &_fbb, const char *value = nullptr) +{ + auto value__ = value ? _fbb.CreateString(value) : 0; + return loft::CreateStringVal(_fbb, value__); +} + +struct kvPair FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef kvPairBuilder Builder; + + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_KEY = 4, + VT_VALUE_TYPE = 6, + VT_VALUE = 8 + }; + + const ::flatbuffers::String *key() const + { + return GetPointer(VT_KEY); + } + + loft::DataMeta value_type() const + { + return static_cast(GetField(VT_VALUE_TYPE, 0)); + } + + const void *value() const { return GetPointer(VT_VALUE); } + + template const T *value_as() const; + + const loft::LongVal *value_as_LongVal() const + { + return value_type() == loft::DataMeta_LongVal ? static_cast(value()) + : nullptr; + } + + const loft::DoubleVal *value_as_DoubleVal() const + { + return value_type() == loft::DataMeta_DoubleVal + ? static_cast(value()) + : nullptr; + } + + const loft::StringVal *value_as_StringVal() const + { + return value_type() == loft::DataMeta_StringVal + ? static_cast(value()) + : nullptr; + } + + bool Verify(::flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_KEY) && + verifier.VerifyString(key()) && VerifyField(verifier, VT_VALUE_TYPE, 1) && + VerifyOffset(verifier, VT_VALUE) && + VerifyDataMeta(verifier, value(), value_type()) && verifier.EndTable(); + } +}; + +template <> inline const loft::LongVal *kvPair::value_as() const +{ + return value_as_LongVal(); +} + +template <> inline const loft::DoubleVal *kvPair::value_as() const +{ + return value_as_DoubleVal(); +} + +template <> inline const loft::StringVal *kvPair::value_as() const +{ + return value_as_StringVal(); +} + +struct kvPairBuilder { + typedef kvPair Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + + void add_key(::flatbuffers::Offset<::flatbuffers::String> key) + { + fbb_.AddOffset(kvPair::VT_KEY, key); + } + + void add_value_type(loft::DataMeta value_type) + { + fbb_.AddElement(kvPair::VT_VALUE_TYPE, static_cast(value_type), 0); + } + + void add_value(::flatbuffers::Offset value) { fbb_.AddOffset(kvPair::VT_VALUE, value); } + + explicit kvPairBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset +CreatekvPair(::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::String> key = 0, + loft::DataMeta value_type = loft::DataMeta_NONE, ::flatbuffers::Offset value = 0) +{ + kvPairBuilder builder_(_fbb); + builder_.add_value(value); + builder_.add_key(key); + builder_.add_value_type(value_type); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset +CreatekvPairDirect(::flatbuffers::FlatBufferBuilder &_fbb, const char *key = nullptr, + loft::DataMeta value_type = loft::DataMeta_NONE, + ::flatbuffers::Offset value = 0) +{ + auto key__ = key ? _fbb.CreateString(key) : 0; + return loft::CreatekvPair(_fbb, key__, value_type, value); +} + +struct DML FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef DMLBuilder Builder; + + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_CHECK_POINT = 4, + VT_DB_NAME = 6, + VT_DN = 8, + VT_FIELDS = 10, + VT_KEYS = 12, + VT_LAST_COMMIT = 14, + VT_LSN = 16, + VT_MSG_TIME = 18, + VT_NEW_DATA = 20, + VT_OLD_DATA = 22, + VT_OP_TYPE = 24, + VT_SCN = 26, + VT_SEQ = 28, + VT_TABLE_ = 30, + VT_TX_SEQ = 32, + VT_TX_TIME = 34 + }; + + const ::flatbuffers::String *check_point() const + { + return GetPointer(VT_CHECK_POINT); + } + + const ::flatbuffers::String *db_name() const + { + return GetPointer(VT_DB_NAME); + } + + int16_t dn() const { return GetField(VT_DN, 0); } + + const ::flatbuffers::Vector<::flatbuffers::Offset> *fields() const + { + return GetPointer> *>( + VT_FIELDS); + } + + const ::flatbuffers::Vector<::flatbuffers::Offset> *keys() const + { + return GetPointer> *>( + VT_KEYS); + } + + int64_t last_commit() const { return GetField(VT_LAST_COMMIT, 0); } + + int64_t lsn() const { return GetField(VT_LSN, 0); } + + const ::flatbuffers::String *msg_time() const + { + return GetPointer(VT_MSG_TIME); + } + + const ::flatbuffers::Vector<::flatbuffers::Offset> *new_data() const + { + return GetPointer> *>( + VT_NEW_DATA); + } + + int32_t old_data() const { return GetField(VT_OLD_DATA, 0); } + + const ::flatbuffers::String *op_type() const + { + return GetPointer(VT_OP_TYPE); + } + + int64_t scn() const { return GetField(VT_SCN, 0); } + + int64_t seq() const { return GetField(VT_SEQ, 0); } + + const ::flatbuffers::String *table_() const + { + return GetPointer(VT_TABLE_); + } + + int64_t tx_seq() const { return GetField(VT_TX_SEQ, 0); } + + const ::flatbuffers::String *tx_time() const + { + return GetPointer(VT_TX_TIME); + } + + bool Verify(::flatbuffers::Verifier &verifier) const + { + return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_CHECK_POINT) && + verifier.VerifyString(check_point()) && VerifyOffset(verifier, VT_DB_NAME) && + verifier.VerifyString(db_name()) && VerifyField(verifier, VT_DN, 2) && + VerifyOffset(verifier, VT_FIELDS) && verifier.VerifyVector(fields()) && + verifier.VerifyVectorOfTables(fields()) && VerifyOffset(verifier, VT_KEYS) && + verifier.VerifyVector(keys()) && verifier.VerifyVectorOfTables(keys()) && + VerifyField(verifier, VT_LAST_COMMIT, 8) && + VerifyField(verifier, VT_LSN, 8) && VerifyOffset(verifier, VT_MSG_TIME) && + verifier.VerifyString(msg_time()) && VerifyOffset(verifier, VT_NEW_DATA) && + verifier.VerifyVector(new_data()) && verifier.VerifyVectorOfTables(new_data()) && + VerifyField(verifier, VT_OLD_DATA, 4) && + VerifyOffset(verifier, VT_OP_TYPE) && verifier.VerifyString(op_type()) && + VerifyField(verifier, VT_SCN, 8) && + VerifyField(verifier, VT_SEQ, 8) && VerifyOffset(verifier, VT_TABLE_) && + verifier.VerifyString(table_()) && VerifyField(verifier, VT_TX_SEQ, 8) && + VerifyOffset(verifier, VT_TX_TIME) && verifier.VerifyString(tx_time()) && + verifier.EndTable(); + } +}; + +struct DMLBuilder { + typedef DML Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + + void add_check_point(::flatbuffers::Offset<::flatbuffers::String> check_point) + { + fbb_.AddOffset(DML::VT_CHECK_POINT, check_point); + } + + void add_db_name(::flatbuffers::Offset<::flatbuffers::String> db_name) + { + fbb_.AddOffset(DML::VT_DB_NAME, db_name); + } + + void add_dn(int16_t dn) { fbb_.AddElement(DML::VT_DN, dn, 0); } + + void add_fields( + ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> fields) + { + fbb_.AddOffset(DML::VT_FIELDS, fields); + } + + void + add_keys(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> keys) + { + fbb_.AddOffset(DML::VT_KEYS, keys); + } + + void add_last_commit(int64_t last_commit) + { + fbb_.AddElement(DML::VT_LAST_COMMIT, last_commit, 0); + } + + void add_lsn(int64_t lsn) { fbb_.AddElement(DML::VT_LSN, lsn, 0); } + + void add_msg_time(::flatbuffers::Offset<::flatbuffers::String> msg_time) + { + fbb_.AddOffset(DML::VT_MSG_TIME, msg_time); + } + + void add_new_data( + ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> new_data) + { + fbb_.AddOffset(DML::VT_NEW_DATA, new_data); + } + + void add_old_data(int32_t old_data) { fbb_.AddElement(DML::VT_OLD_DATA, old_data, 0); } + + void add_op_type(::flatbuffers::Offset<::flatbuffers::String> op_type) + { + fbb_.AddOffset(DML::VT_OP_TYPE, op_type); + } + + void add_scn(int64_t scn) { fbb_.AddElement(DML::VT_SCN, scn, 0); } + + void add_seq(int64_t seq) { fbb_.AddElement(DML::VT_SEQ, seq, 0); } + + void add_table_(::flatbuffers::Offset<::flatbuffers::String> table_) + { + fbb_.AddOffset(DML::VT_TABLE_, table_); + } + + void add_tx_seq(int64_t tx_seq) { fbb_.AddElement(DML::VT_TX_SEQ, tx_seq, 0); } + + void add_tx_time(::flatbuffers::Offset<::flatbuffers::String> tx_time) + { + fbb_.AddOffset(DML::VT_TX_TIME, tx_time); + } + + explicit DMLBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) + { + start_ = fbb_.StartTable(); + } + + ::flatbuffers::Offset Finish() + { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateDML( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::String> check_point = 0, + ::flatbuffers::Offset<::flatbuffers::String> db_name = 0, int16_t dn = 0, + ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> fields = 0, + ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> keys = 0, + int64_t last_commit = 0, int64_t lsn = 0, + ::flatbuffers::Offset<::flatbuffers::String> msg_time = 0, + ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> new_data = 0, + int32_t old_data = 0, ::flatbuffers::Offset<::flatbuffers::String> op_type = 0, int64_t scn = 0, + int64_t seq = 0, ::flatbuffers::Offset<::flatbuffers::String> table_ = 0, int64_t tx_seq = 0, + ::flatbuffers::Offset<::flatbuffers::String> tx_time = 0) +{ + DMLBuilder builder_(_fbb); + builder_.add_tx_seq(tx_seq); + builder_.add_seq(seq); + builder_.add_scn(scn); + builder_.add_lsn(lsn); + builder_.add_last_commit(last_commit); + builder_.add_tx_time(tx_time); + builder_.add_table_(table_); + builder_.add_op_type(op_type); + builder_.add_old_data(old_data); + builder_.add_new_data(new_data); + builder_.add_msg_time(msg_time); + builder_.add_keys(keys); + builder_.add_fields(fields); + builder_.add_db_name(db_name); + builder_.add_check_point(check_point); + builder_.add_dn(dn); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset +CreateDMLDirect(::flatbuffers::FlatBufferBuilder &_fbb, const char *check_point = nullptr, + const char *db_name = nullptr, int16_t dn = 0, + const std::vector<::flatbuffers::Offset> *fields = nullptr, + const std::vector<::flatbuffers::Offset> *keys = nullptr, + int64_t last_commit = 0, int64_t lsn = 0, const char *msg_time = nullptr, + const std::vector<::flatbuffers::Offset> *new_data = nullptr, + int32_t old_data = 0, const char *op_type = nullptr, int64_t scn = 0, + int64_t seq = 0, const char *table_ = nullptr, int64_t tx_seq = 0, + const char *tx_time = nullptr) +{ + auto check_point__ = check_point ? _fbb.CreateString(check_point) : 0; + auto db_name__ = db_name ? _fbb.CreateString(db_name) : 0; + auto fields__ = fields ? _fbb.CreateVector<::flatbuffers::Offset>(*fields) : 0; + auto keys__ = keys ? _fbb.CreateVector<::flatbuffers::Offset>(*keys) : 0; + auto msg_time__ = msg_time ? _fbb.CreateString(msg_time) : 0; + auto new_data__ = + new_data ? _fbb.CreateVector<::flatbuffers::Offset>(*new_data) : 0; + auto op_type__ = op_type ? _fbb.CreateString(op_type) : 0; + auto table___ = table_ ? _fbb.CreateString(table_) : 0; + auto tx_time__ = tx_time ? _fbb.CreateString(tx_time) : 0; + return loft::CreateDML(_fbb, check_point__, db_name__, dn, fields__, keys__, last_commit, lsn, + msg_time__, new_data__, old_data, op_type__, scn, seq, table___, tx_seq, + tx_time__); +} + +inline bool VerifyDataMeta(::flatbuffers::Verifier &verifier, const void *obj, DataMeta type) +{ + switch (type) { + case DataMeta_NONE: { + return true; + } + case DataMeta_LongVal: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case DataMeta_DoubleVal: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case DataMeta_StringVal: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + default: + return true; + } +} + +inline bool VerifyDataMetaVector(::flatbuffers::Verifier &verifier, + const ::flatbuffers::Vector<::flatbuffers::Offset> *values, + const ::flatbuffers::Vector *types) +{ + if (!values || !types) { + return !values && !types; + } + if (values->size() != types->size()) { + return false; + } + for (::flatbuffers::uoffset_t i = 0; i < values->size(); ++i) { + if (!VerifyDataMeta(verifier, values->Get(i), types->GetEnum(i))) { + return false; + } + } + return true; +} + +inline const loft::DML *GetDML(const void *buf) +{ + return ::flatbuffers::GetRoot(buf); +} + +inline const loft::DML *GetSizePrefixedDML(const void *buf) +{ + return ::flatbuffers::GetSizePrefixedRoot(buf); +} + +inline bool VerifyDMLBuffer(::flatbuffers::Verifier &verifier) +{ + return verifier.VerifyBuffer(nullptr); +} + +inline bool VerifySizePrefixedDMLBuffer(::flatbuffers::Verifier &verifier) +{ + return verifier.VerifySizePrefixedBuffer(nullptr); +} + +inline void FinishDMLBuffer(::flatbuffers::FlatBufferBuilder &fbb, + ::flatbuffers::Offset root) +{ + fbb.Finish(root); +} + +inline void FinishSizePrefixedDMLBuffer(::flatbuffers::FlatBufferBuilder &fbb, + ::flatbuffers::Offset root) +{ + fbb.FinishSizePrefixed(root); +} + +} // namespace loft + +#endif // FLATBUFFERS_GENERATED_DML_LOFT_H_ diff --git a/binlogconvert/include/log_file.h b/binlogconvert/include/log_file.h new file mode 100644 index 0000000000000000000000000000000000000000..f2eacd4ad7c28f6fc6d620aa0844db1fe15cc741 --- /dev/null +++ b/binlogconvert/include/log_file.h @@ -0,0 +1,539 @@ +#pragma once + +#include // std::string +#include // std::filesystem::path> +#include +#include +#include // std::map +#include +#include +#include // std::error_code +#include // std::pair +#include + +#include "binlog.h" +#include "common/init_setting.h" +#include "common/rc.h" +#include "common/task_queue.h" +#include "events/abstract_event.h" +#include "transform_manager.h" + +#include "common/thread_pool_executor.h" + +using namespace common; + +constexpr int DDL_EVENT_NUM = 2; +constexpr int DML_EVENT_NUM = 5; +constexpr int THREAD_WAIT_TIMEOUT_MS = 100; + +// 默认过期时间常量 24h +static constexpr int DEFAULT_EXPIRATION_TIME = 24 * 60 * 60; + +/** + * @brief 负责处理一个日志文件,包括读取和写入 + */ +class RedoLogFileReader { +public: + RedoLogFileReader() = default; + ~RedoLogFileReader() { close(); } + + auto open(const char *filename) -> RC; + + auto close() -> RC; + + auto readFromFile(const std::string &fileName) -> std::pair, size_t>; + +private: + int fd_ = -1; + std::string filename_; +}; + +/** + * @brief 负责写入一个日志文件, 【封装 我写的 MYSQL_BIN_LOG 类】 + */ +class BinLogFileWriter { +public: + BinLogFileWriter() = default; + ~BinLogFileWriter() = default; + + /** + * @brief 打开一个日志文件 + * @param filename 日志文件名 + */ + RC open(const char *filename, size_t max_file_size); + + /// @brief 关闭当前文件 + RC close(); + + /// @brief 写入一条 event + RC write(AbstractEvent &event); + + /** + * @brief 文件是否已经写满。按照剩余空间来判断 + */ + bool full() const; + + const char *filename() const { return filename_.c_str(); } + + std::string &get_clean_filename() { return clean_filename_; } + + auto get_binlog() -> MYSQL_BIN_LOG * { return bin_log_.get(); } + +private: + std::string filename_; /// 日志文件名 + std::string clean_filename_; /// 末尾无 '\0' 结束符 + std::unique_ptr bin_log_; /// 封装的 MYSQL_BIN_LOG 类 +}; + +/** + * @brief 管理所有的 binlog 日志文件, 【封装我的 mgr 类】 + * @details binlog 日志文件都在某个目录下,使用固定的前缀 作为文件名如 + * ON.000001。 每个 binlog 日志文件有最大字节数要求 + */ +class LogFileManager { +public: + LogFileManager(); + ~LogFileManager(); + + /// 接口一: + /** + * @details 每次调用,都会填充当前目录下的所有 binlog 文件到 log_files_ + * 里,如果没有该目录,就创建; 程序中途可能会异常退出,上层会重新调用 + * SetBinlogPath()函数,所以在此处要先判断是否当前目录下已经有 control 文件 + * 如果有,则读取出来进度,找到对应 binlog 文件,把 offset 后的数据都 + * truncate + * @param bashPathBytes 日志文件目录字节数组 + * @param length 字节数组长度 + * @param maxSize 单个 binlog 文件大小上限 + * @param threadNum 转换最大工作线程数 + * @param capacity 目录下所有日志文件可存储最大容量 + * @param expirationTime binlog文件的超时时间 + * @return RC::SUCCESS 表示成功 + */ + auto SetBinlogPath(char *bashPathBytes, int length, long maxSize, int threadNum, long capacity, + int expirationTime) -> RC; + + /// 接口二: + /** + * @param fbStr 待转换的 redo log 字节数组 + * @param length 字节数组长度 + * @param is_ddl true表示是 ddl 语句,false 表示是 ddl 语句 + * @return RC::SUCCESS 表示成功 + */ + auto ConvertFlatBufferToBinlog(char *fbStr, int length, bool is_ddl) -> std::future; + + /// 接口三: + /** + * @brief 获取 binlog 具体转换到哪一个进度点,以落盘 binlog 文件的时间点为主 + * @details 如果日志文件名不符合要求,就返回失败。实际上返回 3 个 + * scn、seq、ckp 字段,只用保存 ckp。读第一行 ckp, 在读的时候,不允许 + * write_thread_ 的 process_writes的写入 + * @return RC::SUCCESS 表示成功 + */ + auto GetLastScnAndSeq(long &scn, long &seq, std::string &ckp) -> RC; + + // TODO 暂时不用实现 + auto ConvertToBinlog(char *jsonStrBytes, int length) -> RC; + + /** + * @brief 获取最新的一个日志文件名 + * @details + * 如果当前有文件就获取最后一个日志文件,否则创建一个日志文件,也就是第一个日志文件 + */ + auto last_file(BinLogFileWriter &file_writer) -> RC; + + auto create_file(BinLogFileWriter &file_writer) -> RC; + + /** + * @brief 获取一个新的日志文件名 + * @details + * 获取下一个日志文件名。通常是上一个日志文件写满了,通过这个接口生成下一个日志文件 + */ + auto next_file(BinLogFileWriter &file_writer) -> RC; + + /** + * @brief 追踪处理进度 [for-test] + */ + void log_progress() + { + LOG_INFO("Pending tasks: %zu, Processed SQL num: %zu, Written Events num: %zu", + ring_buffer_->get_task_count_blocking(), processed_tasks_.load(), + written_tasks_.load()); + } + + auto get_directory() -> const char * { return directory_.c_str(); } + + auto get_file_prefix() -> const char * { return file_prefix_; } + + auto get_file_max_size() -> size_t { return max_file_size_per_file_; } + + auto get_log_files() -> std::map> & + { + return log_files_; + } + + auto get_file_reader() -> RedoLogFileReader * { return file_reader_.get(); } + + auto get_file_writer() -> BinLogFileWriter * { return file_writer_.get(); } + + auto get_transform_manager() -> LogFormatTransformManager * { return transform_manager_.get(); } + + auto get_last_file_no() -> uint32 { return last_file_no_.load(); } + +private: + /// ****************** binlog 文件的管理 *************** + /** + * @brief 从文件名中获取 文件编号 + * @param filename + * @param fileno + */ + auto get_fileno_from_filename(const std::string &filename, uint32 &fileno) -> RC; + + /** + * @brief 写binlog索引文件 + */ + auto write_filename2index(std::string &filenameWithTs) -> RC; + + class BatchProcessor : public Runnable { + public: + BatchProcessor(LogFileManager *manager, std::vector &&tasks, size_t sequence) + : manager_(manager) + , tasks_(std::move(tasks)) + , batch_sequence_(sequence) + {} + + void run() override + { + auto result = std::make_unique(batch_sequence_); + std::string checkpoint; + long long seq, scn; + + result->transformed_data.clear(); + for (auto &task : tasks_) { + std::vector> events; + RC rc; + + // 为每个新任务(DDL或DML)创建一个新的事务组 + std::vector> transaction_group; + if (task.is_ddl_) { + const DDL *ddl = GetDDL(task.data_.data()); + checkpoint = ddl->check_point()->c_str(); + seq = ddl->seq(); + scn = ddl->scn(); + + // 转换但不直接写入文件 + events.reserve(DDL_EVENT_NUM); + rc = manager_->get_transform_manager()->transformDDL(ddl, events); + if (LOFT_FAIL(rc)) { + manager_->stop_flag_ = true; + LOG_ERROR("transformDDL failed, ckp = %s", checkpoint.c_str()); + // 原子更新全局运行状态 + manager_->global_runtime_status_.store(rc, std::memory_order_relaxed); + continue; + } + for (auto &event : events) { + transaction_group.push_back(transform_to_buffer(event.get())); + } + } else { + const DML *dml = GetDML(task.data_.data()); + checkpoint = dml->check_point()->c_str(); + seq = dml->seq(); + scn = dml->scn(); + + events.reserve(DML_EVENT_NUM); + // 转换但不直接写入文件 + rc = manager_->get_transform_manager()->transformDML(dml, events); + if (LOFT_FAIL(rc)) { + manager_->stop_flag_ = true; + LOG_ERROR("transformDML failed, ckp = %s", checkpoint.c_str()); + // 原子更新全局运行状态 + manager_->global_runtime_status_.store(rc, std::memory_order_relaxed); + continue; + } + for (auto &event : events) { + transaction_group.push_back(transform_to_buffer(event.get())); + } + + } + // 将完整的事务组添加到结果中 + result->transformed_data.push_back(std::move(transaction_group)); + + // 记录每个 batch_size 的最后一个 ckp,写入 control 文件 + result->last_ckp_ = checkpoint; + result->seq_ = seq; + result->scn_ = scn; + } + + // 1. 将结果加入写入队列 + manager_->result_queue_.add_result(std::move(result)); + // 2. 统计信息 + manager_->processed_tasks_ += tasks_.size(); + + manager_->finished_tasks_.fetch_add(tasks_.size(), + std::memory_order_relaxed); // 记录已完成的任务数 + } + + private: + // 将转换后的数据存入内存 + std::vector transform_to_buffer(AbstractEvent *event) + { + std::vector buffer(LOG_EVENT_HEADER_LEN + event->get_data_size(), 0); + // 将event写入buffer + size_t pos = event->write_to_buffer(buffer.data()); + + return buffer; + } + + private: + LogFileManager *manager_; + std::vector tasks_; + size_t batch_sequence_; // 批次序号,用于确保顺序执行 + }; + + struct CkpInfo { + char log_file_name[512]; // 原 file_name,固定长度 + char index_file_name[512]; // 新增字段,固定长度 + long long position; // 原 log_pos + long long scn; // 新增字段 + long long seq; // 新增字段 + char checkpoint[100]; // 原 ckp,固定长度 + }; + + // 用于存储转换后的数据 + struct BatchResult { + size_t sequence; + std::vector>> transformed_data; // 每个event转换后的数据 + size_t event_write_count_{0}; + std::string last_ckp_; + long long seq_, scn_; + + BatchResult(size_t seq) + : sequence(seq) + {} + }; + + // 管理已转换完成待写入的结果队列 + struct ResultQueue { + std::mutex mutex_; + std::condition_variable cv_; + std::unordered_map> pending_results_; + size_t next_write_sequence_{0}; + std::atomic *stop_flag_; + + void add_result(std::unique_ptr result) + { + std::lock_guard lock(mutex_); + pending_results_[result->sequence] = std::move(result); + // 只有当下一个期望序号的结果到达时才通知 + if (pending_results_.count(next_write_sequence_) > 0) { + cv_.notify_one(); + } + } + + // 专门的文件写入线程 + void process_writes(BinLogFileWriter *writer, LogFileManager *manager) + { + while (!(*stop_flag_)) { + std::vector> results_to_write; + { + std::unique_lock lock(mutex_); + if (cv_.wait_for(lock, std::chrono::milliseconds(THREAD_WAIT_TIMEOUT_MS), + [this] { + return *stop_flag_ || + pending_results_.count(next_write_sequence_) > 0; + })) { + if (*stop_flag_ && pending_results_.empty()) { + break; + } + + // 收集所有连续的可写入批次 + while (pending_results_.count(next_write_sequence_) > 0) { + results_to_write.push_back( + std::move(pending_results_[next_write_sequence_])); + pending_results_.erase(next_write_sequence_); + next_write_sequence_++; + } + } + } + + if (!results_to_write.empty()) { + std::lock_guard write_lock(manager->writer_mutex_); + + uint64 resutl_batch_offset = 0; + // 处理所有收集到的批次 + for (auto &result : results_to_write) { + manager->written_tasks_ += result->transformed_data.size(); + + uint64 next_pos = 0; + for (auto &transaction_group : result->transformed_data) { + // 1. 计算事务组总长度 + size_t group_total_size = 0; + for (auto &event_data : transaction_group) { + const uint32 *event_len = reinterpret_cast(event_data.data() + EVENT_LEN_OFFSET); + group_total_size += *event_len; + } + + // 2. 检查剩余空间是否足够写入整个事务组,不够则切换文件 + if (!writer->get_binlog()->remain_bytes_safe(group_total_size)) { + manager->next_file(*writer); + } + + // 3. 写入事务组内的所有event数据 + for (auto &event_data : transaction_group) { + // 写入实际数据, 填充 common_header 中的 log_pos 字段 + uint64 current_pos = writer->get_binlog()->get_bytes_written(); + next_pos = current_pos + event_data.size(); + int4store(event_data.data() + LOG_POS_OFFSET, next_pos); + + writer->get_binlog()->write(event_data.data(), event_data.size()); + result->event_write_count_++; + resutl_batch_offset = next_pos; + + } + } + } + + writer->get_binlog()->flush(); + // 只在最后一个批次更新检查点 + CkpInfo ckp_info; + strncpy(ckp_info.log_file_name, writer->get_clean_filename().c_str(), sizeof(ckp_info.log_file_name) - 1); + strncpy(ckp_info.checkpoint, results_to_write.back()->last_ckp_.c_str(), sizeof(ckp_info.checkpoint) - 1); + ckp_info.position = resutl_batch_offset; + ckp_info.seq = results_to_write.back()->seq_; + ckp_info.scn = results_to_write.back()->scn_; + strncpy(ckp_info.index_file_name, default_binlog_index_file_name_, sizeof(ckp_info.index_file_name) - 1); + manager->actual_write_ckp2control(ckp_info); + } + } + } + }; + + /** + * @brief 等待 BatchQueue 和 ResultQueue 的任务都完成 + */ + void wait_for_completion(); + + /** + * @brief 保证所有任务执行完后安全释放资源 + */ + void shutdown(); + + /** + * @brief 任务收集 线程 + */ + void process_tasks(); + + /** + * @brief 后台单独开启一个线程,专门清理 binlog 文件,当到达设置的设置 binlog + * 文件的 TTL 之后 + * @details 清理 log_files_ 防止膨胀,remove 文件 + */ + void clean_logs(); + + /** + * @breif 写 CkpInfo 到 control 文件,每处理一个 batch 写一次 + */ + auto actual_write_ckp2control(const CkpInfo &ckp_info) -> RC; + + /** + * @breif 调用 API1 时,会先查询 control 文件的进度,做统一处理 + * + */ + auto read_from_ckp_file_content(const std::filesystem::path &control_path, + std::string &ckp, + std::string &target_filename, + uint64 &offset) ->RC; + +private: + const char *file_prefix_ = DEFAULT_BINLOG_FILE_NAME_PREFIX; + const char *file_dot_ = "."; + std::string file_suffix_; // 这会是一个递增的后缀数字 + + std::string index_suffix_ = ".index"; + int index_fd_ = -1; // init()后,就打开 index 文件 + + std::mutex ckp_write_mutex_; + std::string control_file_suffix_ = ".controlinfo"; + int ckp_fd_ = -1; // init()后,就打开 ckp 文件 + + std::filesystem::path directory_ = DEFAULT_BINLOG_FILE_DIR; /// 日志文件存放的目录 + size_t max_file_size_per_file_ = DEFAULT_BINLOG_FILE_SIZE; /// 一个文件的最大字节数 + static const char* default_binlog_index_file_name_; /// 索引文件绝对路径,记录在controlinfo中 + + static constexpr int BINLOG_NAME_WIDTH = 6; + std::map> + log_files_; /// file_no 和 日志文件名 的映射 + std::mutex log_file_mutex_; + std::condition_variable cleaner_cv_; + std::thread cleaner_thread_; + std::chrono::steady_clock::time_point last_expiration_check_; + std::chrono::seconds expiration_check_interval_; // 控制检查的频次,至少是超时时间的 1 + // 倍,隔离 capacity 的清理逻辑 + + uint32 binlog_num_threshold_; + float trigger_ratio_ = 0.8; // e.g., 0.8 + float clean_ratio_ = 0.2; // e.g., 0.2 + + std::atomic last_file_no_{0}; // 当前目录下最后一个文件号 + std::unique_ptr file_reader_; + + // 1. 生产者——投放任务 + std::shared_ptr> ring_buffer_; + static constexpr size_t RING_BUFFER_CAPACITY = 10000; + std::condition_variable task_cond_; // event_trigger 通知 + std::mutex task_mutex_; + std::thread task_collector_thread_; // 用于运行process_tasks的线程 + static constexpr size_t BATCH_SIZE = 4096; // 批量处理的大小 + + std::atomic finished_tasks_{0}; // 记录已完成的任务数 + std::atomic total_solve_tasks_{0}; // 记录任务总数 + + std::mutex pending_tasks_mutex_; + std::atomic pending_tasks_{0}; // 跟踪待处理任务数量 + std::chrono::time_point last_task_thread_notify_time = + std::chrono::steady_clock::now(); + + std::atomic stop_flag_{false}; // 用于控制线程停止 + + // 2. 消费者——转换计算 + std::unique_ptr transform_manager_; + std::unique_ptr thread_pool_; + int transform_max_thread_num_; + + std::atomic batch_sequence_{0}; // 顺序收集 tasks 的批次序号 + // global system runtime error code + std::atomic global_runtime_status_{RC::SUCCESS}; + + // 3. 共享的文件写入器 + std::unique_ptr file_writer_; + std::mutex writer_mutex_; // 保护文件写入 + ResultQueue result_queue_; + std::thread writer_thread_; // 专门的写入线程 + + // debug info 追踪进度 + std::atomic processed_tasks_{0}; + std::atomic written_tasks_{0}; + + std::chrono::time_point start_time_; +}; + +// 全局变量 +extern std::unique_ptr g_log_file_manager; + +#ifdef __cplusplus +extern "C" { +#endif + +RC SetBinlogPath(char *bashPathBytes, int length, long maxSize, int threadNum, long capacity, + int expirationTime); + +RC GetLastScnAndSeq(long *scn, long *seq, char *ckp); + +RC ConvertToBinlog(char *jsonStrBytes, int length); + +// C 语言接口声明,返回 future 句柄(可能是指针或简单的 ID) +RC ConvertFlatBufferToBinlog(char *fbStr, int length, bool is_ddl); + +#ifdef __cplusplus +} +#endif diff --git a/binlogconvert/include/sql/field_common_properties.h b/binlogconvert/include/sql/field_common_properties.h new file mode 100644 index 0000000000000000000000000000000000000000..b266e8a8c38638e245a1379635305f278bac1fdd --- /dev/null +++ b/binlogconvert/include/sql/field_common_properties.h @@ -0,0 +1,37 @@ +#pragma once + +#include "field_types.h" + +// refer from: mysql field_common_properties.h + +static constexpr int DECIMAL_MAX_SCALE{30}; +static constexpr int DECIMAL_NOT_SPECIFIED{DECIMAL_MAX_SCALE + 1}; + +/** YYYY-MM-DD */ +constexpr const int MAX_DATE_WIDTH{10}; +/** -838:59:59 */ +constexpr const int MAX_TIME_WIDTH{10}; +/** -DDDDDD HH:MM:SS.###### */ +constexpr const int MAX_TIME_FULL_WIDTH{23}; +/** YYYY-MM-DD HH:MM:SS.###### AM */ +constexpr const int MAX_DATETIME_FULL_WIDTH{29}; +/** YYYY-MM-DD HH:MM:SS */ +constexpr const int MAX_DATETIME_WIDTH{19}; + +/** maximum length of buffer in our big digits (uint32). */ +static constexpr int DECIMAL_BUFF_LENGTH{9}; +/** the number of digits that my_decimal can possibly contain */ +static constexpr int DECIMAL_MAX_POSSIBLE_PRECISION{DECIMAL_BUFF_LENGTH * 9}; + +constexpr const int DATETIME_MAX_DECIMALS = 6; +static constexpr int DECIMAL_MAX_PRECISION{DECIMAL_MAX_POSSIBLE_PRECISION - 8 * 2}; +#define portable_sizeof_char_ptr 8 /**< blob 类型字段的指针大小 */ + +#define NOT_NULL_FLAG 1 /**< Field can't be NULL */ +#define BLOB_FLAG 16 /**< Field is a blob */ +#define UNSIGNED_FLAG 32 /**< Field is unsigned */ +#define ZEROFILL_FLAG 64 /**< Field is zerofill */ +#define BINARY_FLAG 128 /**< Field is binary */ +#define ENUM_FLAG 256 /**< field is an enum */ +#define TIMESTAMP_FLAG 1024 /**< Field is a timestamp */ +#define SET_FLAG 2048 /**< field is a set */ diff --git a/binlogconvert/include/sql/field_types.h b/binlogconvert/include/sql/field_types.h new file mode 100644 index 0000000000000000000000000000000000000000..f3d9692804df3728b05feafaa25a6048a43082b0 --- /dev/null +++ b/binlogconvert/include/sql/field_types.h @@ -0,0 +1,85 @@ +// refer from: mysql/include/field_types.h +#pragma once + +#include +#include +/** + Column types for MySQL +*/ +enum enum_field_types { + MYSQL_TYPE_DECIMAL, + MYSQL_TYPE_TINY, + MYSQL_TYPE_SHORT, + MYSQL_TYPE_LONG, + MYSQL_TYPE_FLOAT, + MYSQL_TYPE_DOUBLE, + MYSQL_TYPE_NULL, + MYSQL_TYPE_TIMESTAMP, + MYSQL_TYPE_LONGLONG, + MYSQL_TYPE_INT24, + MYSQL_TYPE_DATE, // 10 + MYSQL_TYPE_TIME, // 11 + MYSQL_TYPE_DATETIME, // 12 + MYSQL_TYPE_YEAR, // 13 + MYSQL_TYPE_NEWDATE, /**< Internal to MySQL. Not used in protocol */ + MYSQL_TYPE_VARCHAR, + MYSQL_TYPE_BIT, + MYSQL_TYPE_TIMESTAMP2, // 17 + MYSQL_TYPE_DATETIME2, /**< Internal to MySQL. Not used in protocol */ + MYSQL_TYPE_TIME2, /**< Internal to MySQL. Not used in protocol */ + MYSQL_TYPE_TYPED_ARRAY, /**< Used for replication only */ + MYSQL_TYPE_INVALID = 243, + MYSQL_TYPE_BOOL = 244, /**< Currently just a placeholder */ + MYSQL_TYPE_JSON = 245, + MYSQL_TYPE_NEWDECIMAL = 246, + MYSQL_TYPE_ENUM = 247, + MYSQL_TYPE_SET = 248, + MYSQL_TYPE_TINY_BLOB = 249, + MYSQL_TYPE_MEDIUM_BLOB = 250, + MYSQL_TYPE_LONG_BLOB = 251, + MYSQL_TYPE_BLOB = 252, + MYSQL_TYPE_VAR_STRING = 253, + MYSQL_TYPE_STRING = 254, + MYSQL_TYPE_GEOMETRY = 255 +}; + +// 定义映射关系 +inline const std::unordered_map type_map = { + {"TINYINT", MYSQL_TYPE_TINY}, + {"SMALLINT", MYSQL_TYPE_SHORT}, + {"SHORT", MYSQL_TYPE_SHORT}, + {"MEDIUMINT", MYSQL_TYPE_INT24}, + {"INT", MYSQL_TYPE_LONG}, + {"BIGINT", MYSQL_TYPE_LONGLONG}, + {"FLOAT", MYSQL_TYPE_FLOAT}, + {"DOUBLE", MYSQL_TYPE_DOUBLE}, + {"DECIMAL", MYSQL_TYPE_NEWDECIMAL}, + {"NULL", MYSQL_TYPE_NULL}, + {"CHAR", MYSQL_TYPE_STRING}, + {"VARCHAR", MYSQL_TYPE_VARCHAR}, + {"TINYTEXT", MYSQL_TYPE_TINY_BLOB}, + {"TEXT", MYSQL_TYPE_BLOB}, + {"MEDIUMTEXT", MYSQL_TYPE_MEDIUM_BLOB}, + {"LONGTEXT", MYSQL_TYPE_LONG_BLOB}, + {"TINYBLOB", MYSQL_TYPE_TINY_BLOB}, + {"BLOB", MYSQL_TYPE_BLOB}, + {"MEDIUMBLOB", MYSQL_TYPE_MEDIUM_BLOB}, + {"LONGBLOB", MYSQL_TYPE_LONG_BLOB}, + {"TIMESTAMP", MYSQL_TYPE_TIMESTAMP2}, + {"DATE", MYSQL_TYPE_DATE}, + {"TIME", MYSQL_TYPE_TIME}, + {"DATETIME", MYSQL_TYPE_DATETIME}, + {"YEAR", MYSQL_TYPE_YEAR}, + {"BIT", MYSQL_TYPE_BIT}, + {"ENUM", MYSQL_TYPE_ENUM}, + {"SET", MYSQL_TYPE_SET}, + {"JSON", MYSQL_TYPE_JSON}}; + +inline const std::unordered_map charset_multiplier = { + {"armscii8", 1}, {"ascii", 1}, {"big5", 2}, {"binary", 1}, {"cp1250", 1}, {"cp1251", 1}, + {"cp1256", 1}, {"cp1257", 1}, {"cp850", 1}, {"cp852", 1}, {"cp866", 1}, {"cp932", 2}, + {"dec8", 1}, {"eucjpms", 3}, {"euckr", 2}, {"gb18030", 4}, {"gb2312", 2}, {"gbk", 2}, + {"geostd8", 1}, {"greek", 1}, {"hebrew", 1}, {"hp8", 1}, {"keybcs2", 1}, {"koi8r", 1}, + {"koi8u", 1}, {"latin1", 1}, {"latin2", 1}, {"latin5", 1}, {"latin7", 1}, {"macce", 1}, + {"macroman", 1}, {"sjis", 2}, {"swe7", 1}, {"tis620", 1}, {"ucs2", 2}, {"ujis", 3}, + {"utf16", 4}, {"utf16le", 4}, {"utf32", 4}, {"utf8mb3", 3}, {"utf8", 3}, {"utf8mb4", 4}}; diff --git a/binlogconvert/include/sql/mysql_fields.h b/binlogconvert/include/sql/mysql_fields.h new file mode 100644 index 0000000000000000000000000000000000000000..f34c95e5ddf26e58679c25019e1a194311f553b2 --- /dev/null +++ b/binlogconvert/include/sql/mysql_fields.h @@ -0,0 +1,716 @@ +#pragma once + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/logging.h" +#include "common/macros.h" +#include "common/type_def.h" +#include "sql/field_common_properties.h" +#include "sql/field_types.h" // enum_field_types + +namespace mysql { + +class Field; +using FieldRef = std::shared_ptr; + +#define HA_VARCHAR_PACKLENGTH(field_length) ((field_length) < 256 ? 1 : 2) + +// Max width for a VARCHAR column, in number of bytes +constexpr size_t MAX_VARCHAR_WIDTH = 65535; + +constexpr size_t TINY_BLOB_PACKLENGTH = 1; +constexpr size_t SHORT_BLOB_PACKLENGTH = 2; +constexpr size_t MEDIUM_BLOB_PACKLENGTH = 3; +constexpr size_t LONG_BLOB_PACKLENGTH = 4; + +// Maximum sizes of the four BLOB types, in number of bytes +constexpr size_t MAX_TINY_BLOB_WIDTH = 255; +constexpr size_t MAX_SHORT_BLOB_WIDTH = 65535; +constexpr size_t MAX_MEDIUM_BLOB_WIDTH = 16777215; +constexpr size_t MAX_LONG_BLOB_WIDTH = 4294967295; + +constexpr int MAX_ENUM_ELEMENTS_PER_BYTE = 256; +constexpr int BITS_PER_BYTE = 8; + +constexpr int THRESHOLD_SET_MAX_LENGTH = 4; +constexpr int MAX_SET_PACK_LENGTH = 8; + +static unsigned int my_time_binary_length(unsigned int dec) +{ + if (dec > DATETIME_MAX_DECIMALS) { + LOG_ERROR("time dec is too large"); + return -1; + } + return 3 + (dec + 1) / 2; +} + +static unsigned int my_datetime_binary_length(unsigned int dec) +{ + if (dec > DATETIME_MAX_DECIMALS) { + LOG_ERROR("datetime dec is too large"); + return -1; + } + return 5 + (dec + 1) / 2; +} + +static unsigned int my_timestamp_binary_length(unsigned int dec) +{ + if (dec > DATETIME_MAX_DECIMALS) { + LOG_ERROR("timestamp dec is too large"); + return -1; + } + return 4 + (dec + 1) / 2; +} + +class Field { +public: + Field(uint32 length_arg, bool is_nullable_arg, unsigned char null_bit_arg, + const char *field_name_arg); + virtual ~Field() = default; + DISALLOW_COPY(Field); + + bool is_flag_set(unsigned flag) const { return flags & flag; } + + void set_flag(unsigned flag) { flags |= flag; } + + void clear_flag(unsigned flag) { flags &= ~flag; } + + /* + 内存里,这个 field 在 table row 里所占用的字节数 + */ + virtual uint32 pack_length() const { return (uint32)field_length; } + + // float/double/str + uint32 get_width() const { return field_length; } + + /* + 在磁盘上,这个 field 在 table row 里所占用的字节数 + eg:压缩,存储引擎不同 + */ + virtual uint32 pack_length_in_rec() const { return pack_length(); } + + virtual uint32 pack_length_from_metadata(uint32 field_metadata) const { return field_metadata; } + + virtual uint32 row_pack_length() const { return 0; } + + int save_field_metadata(unsigned char *first_byte) + { + return do_save_field_metadata(first_byte); + } + + virtual uint32 data_length(ptrdiff_t row_offset [[maybe_unused]] = 0) const + { + return pack_length(); + } + + virtual enum_field_types type() const = 0; + + virtual enum_field_types real_type() const { return type(); } + + virtual enum_field_types binlog_type() const { return type(); } + + bool is_nullable() const { return m_null; } + + virtual bool is_unsigned() const { return false; } + + virtual uint32 decimals() const { return 0; } + + /** + @returns Field index. + */ + uint16 field_index() const { return m_field_index; } + + virtual int do_save_field_metadata(unsigned char *metadata_ptr) const { return 0; } + +public: + const char *field_name; + bool m_null = false; + unsigned char null_bit; // Bit used to test null bit + uint32 field_length; + +private: + uint32 flags{0}; + uint16 m_field_index; // field number in fields array +}; + +/****************************************************************************** + integer type +******************************************************************************/ + +class Field_num : public Field { +public: + Field_num(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, + const char *field_name_arg, uint8 dec_arg, bool unsigned_arg); + + bool is_unsigned() const final { return unsigned_flag; } + + uint32 decimals() const final { return (uint32)dec; } + + uint32 row_pack_length() const final { return pack_length(); } + + uint32 pack_length_from_metadata(uint32) const override { return pack_length(); } + +public: + const uint8 dec; + +private: + /** + - true - unsigned + - false - signed + */ + const bool unsigned_flag; +}; + +/* New decimal/numeric field which use fixed point arithmetic */ +class Field_new_decimal : public Field_num { +public: + // 构造函数 + Field_new_decimal(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, + const char *field_name_arg, uint8 dec_arg, bool unsigned_arg); + + // 获取类型 + enum_field_types type() const final { return MYSQL_TYPE_NEWDECIMAL; } + + int do_save_field_metadata(unsigned char *first_byte) const final; + +public: + /* The maximum number of decimal digits can be stored */ + uint32 precision; + +private: + bool m_keep_precision{false}; +}; + +class Field_tiny : public Field_num { +public: + Field_tiny(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, + const char *field_name_arg, bool unsigned_arg) + : Field_num(len_arg, is_nullable_arg, null_bit_arg, field_name_arg, 0, unsigned_arg) + {} + + enum_field_types type() const override { return MYSQL_TYPE_TINY; } + + uint32 pack_length() const final { return 1; } +}; + +class Field_short final : public Field_num { +public: + Field_short(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, + const char *field_name_arg, bool unsigned_arg) + : Field_num(len_arg, is_nullable_arg, null_bit_arg, field_name_arg, 0, unsigned_arg) + {} + + enum_field_types type() const final { return MYSQL_TYPE_SHORT; } + + uint32 pack_length() const final { return 2; } +}; + +class Field_medium final : public Field_num { +public: + Field_medium(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, + const char *field_name_arg, bool unsigned_arg) + : Field_num(len_arg, is_nullable_arg, null_bit_arg, field_name_arg, 0, unsigned_arg) + {} + + enum_field_types type() const final { return MYSQL_TYPE_INT24; } + + uint32 pack_length() const final { return 3; } +}; + +class Field_long : public Field_num { +public: + static const int PACK_LENGTH = 4; + + Field_long(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, + const char *field_name_arg, bool unsigned_arg) + : Field_num(len_arg, is_nullable_arg, null_bit_arg, field_name_arg, 0, unsigned_arg) + {} + + enum_field_types type() const final { return MYSQL_TYPE_LONG; } + + uint32 pack_length() const final { return PACK_LENGTH; } +}; + +class Field_longlong : public Field_num { +public: + static const int PACK_LENGTH = 8; + + Field_longlong(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, + const char *field_name_arg, bool unsigned_arg) + : Field_num(len_arg, is_nullable_arg, null_bit_arg, field_name_arg, 0, unsigned_arg) + {} + + enum_field_types type() const final { return MYSQL_TYPE_LONGLONG; } + + uint32 pack_length() const final { return PACK_LENGTH; } +}; + +/****************************************************************************** + float/double/decimal type +***********************************************2*******************************/ + +/* base class for float and double and decimal (old one) */ +class Field_real : public Field_num { +public: + Field_real(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, + const char *field_name_arg, uint8 dec_arg, bool unsigned_arg) + : Field_num(len_arg, is_nullable_arg, null_bit_arg, field_name_arg, dec_arg, unsigned_arg) + , not_fixed(dec_arg >= DECIMAL_NOT_SPECIFIED) + {} + +public: + bool not_fixed; // 固定精度 +}; + +class Field_decimal final : public Field_real { +public: + Field_decimal(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, + const char *field_name_arg, uint8 dec_arg, bool unsigned_arg) + : Field_real(len_arg, is_nullable_arg, null_bit_arg, field_name_arg, dec_arg, unsigned_arg) + {} + + enum_field_types type() const final { return MYSQL_TYPE_DECIMAL; } +}; + +class Field_float final : public Field_real { +public: + Field_float(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, + const char *field_name_arg, uint8 dec_arg, bool unsigned_arg) + : Field_real(len_arg, is_nullable_arg, null_bit_arg, field_name_arg, dec_arg, unsigned_arg) + {} + + enum_field_types type() const final { return MYSQL_TYPE_FLOAT; } + + uint32 pack_length() const final { return sizeof(float); } + + int do_save_field_metadata(unsigned char *first_byte) const final; +}; + +class Field_double final : public Field_real { +public: // 不考虑精度 + Field_double(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, + const char *field_name_arg, uint8 dec_arg, bool unsigned_arg) + : Field_real(len_arg, is_nullable_arg, null_bit_arg, field_name_arg, dec_arg, unsigned_arg) + {} + + enum_field_types type() const final { return MYSQL_TYPE_DOUBLE; } + + uint32 pack_length() const final { return sizeof(double); } + + int do_save_field_metadata(unsigned char *first_byte) const final; +}; + +/****************************************************************************** + temporal type +******************************************************************************/ + +/* + Abstract class for DATE, TIME, DATETIME, TIMESTAMP + with and without fractional part. +*/ +class Field_temporal : public Field { +public: + Field_temporal(bool is_nullable_arg, unsigned char null_bit_arg, const char *field_name_arg, + uint32 len_arg, uint32 dec_arg) + : Field(len_arg + ((normalize_dec(dec_arg)) ? normalize_dec(dec_arg) + 1 : 0), + is_nullable_arg, null_bit_arg, field_name_arg) + { + set_flag(BINARY_FLAG); + dec = normalize_dec(dec_arg); + } + + int do_save_field_metadata(unsigned char *metadata_ptr) const override + { + *metadata_ptr = decimals(); + return 1; + } + +protected: + uint32 dec; // Number of fractional digits + + static uint32 normalize_dec(uint32 dec_arg) + { + return dec_arg == DECIMAL_NOT_SPECIFIED ? DATETIME_MAX_DECIMALS : dec_arg; + } +}; + +/** + Abstract class for types with date + with optional time, with or without fractional part: + DATE, DATETIME, DATETIME(N), TIMESTAMP, TIMESTAMP(N). +*/ +class Field_temporal_with_date : public Field_temporal { +public: + Field_temporal_with_date(bool is_nullable_arg, uchar null_bit_arg, const char *field_name_arg, + uint32 int_length_arg, uint32 dec_arg) + : Field_temporal(is_nullable_arg, null_bit_arg, field_name_arg, int_length_arg, dec_arg) + {} +}; + +/** + Abstract class for types with date and time, + with or without fractional part: + DATETIME, DATETIME(N), TIMESTAMP, TIMESTAMP(N). +*/ +class Field_temporal_with_date_and_time : public Field_temporal_with_date { +public: + Field_temporal_with_date_and_time(bool is_nullable_arg, uchar null_bit_arg, + const char *field_name_arg, uint32 dec_arg) + : Field_temporal_with_date(is_nullable_arg, null_bit_arg, field_name_arg, + MAX_DATETIME_WIDTH, dec_arg) + {} +}; + +/** + Abstract class for types with date and time, with fractional part: + DATETIME, DATETIME(N), TIMESTAMP, TIMESTAMP(N). +*/ +class Field_temporal_with_date_and_timef : public Field_temporal_with_date_and_time { +public: + Field_temporal_with_date_and_timef(bool is_nullable_arg, uchar null_bit_arg, + const char *field_name_arg, uint32 dec_arg) + : Field_temporal_with_date_and_time(is_nullable_arg, null_bit_arg, field_name_arg, dec_arg) + {} + + uint32 decimals() const final { return dec; } +}; + +/* + Field implementing TIMESTAMP(N) data type, where N=0..6. +*/ +class Field_timestampf : public Field_temporal_with_date_and_timef { +public: + Field_timestampf(bool is_nullable_arg, uchar null_bit_arg, const char *field_name_arg, + uint32 dec_arg) + : Field_temporal_with_date_and_timef(is_nullable_arg, null_bit_arg, field_name_arg, dec_arg) + {} + + enum_field_types type() const final { return MYSQL_TYPE_TIMESTAMP2; } + + enum_field_types real_type() const final { return MYSQL_TYPE_TIMESTAMP2; } + + enum_field_types binlog_type() const final { return MYSQL_TYPE_TIMESTAMP2; } + + uint32 pack_length() const final { return my_timestamp_binary_length(dec); } + + uint32 pack_length_from_metadata(uint32 field_metadata) const final + { + uint32 tmp = my_timestamp_binary_length(field_metadata); + return tmp; + } +}; + +class Field_time_common : public Field_temporal { +public: + Field_time_common(bool is_nullable_arg, unsigned char null_bit_arg, const char *field_name_arg, + uint32 dec_arg) + : Field_temporal(is_nullable_arg, null_bit_arg, field_name_arg, MAX_TIME_WIDTH, dec_arg) + {} +}; + +/* + Field implementing TIME data type without fractional seconds. + It will be removed eventually. +*/ +class Field_time final : public Field_time_common { +public: + Field_time(bool is_nullable_arg, uchar null_bit_arg, const char *field_name_arg) + : Field_time_common(is_nullable_arg, null_bit_arg, field_name_arg, 0) + {} + + enum_field_types type() const final { return MYSQL_TYPE_TIME; } +}; + +/* + Field implementing TIME(N) data type, where N=0..6. +*/ +class Field_timef final : public Field_time_common { +public: + Field_timef(bool is_nullable_arg, uchar null_bit_arg, const char *field_name_arg, + uint32 dec_arg) + : Field_time_common(is_nullable_arg, null_bit_arg, field_name_arg, dec_arg) + {} + + uint32 decimals() const final { return dec; } + + enum_field_types type() const final { return MYSQL_TYPE_TIME; } + + enum_field_types real_type() const final { return MYSQL_TYPE_TIME2; } + + enum_field_types binlog_type() const final { return MYSQL_TYPE_TIME2; } + + uint32 pack_length() const final { return my_time_binary_length(dec); } +}; + +class Field_newdate : public Field_temporal_with_date { +public: + static const int PACK_LENGTH = 3; + + Field_newdate(bool is_nullable_arg, uchar null_bit_arg, const char *field_name_arg) + : Field_temporal_with_date(is_nullable_arg, null_bit_arg, field_name_arg, MAX_DATE_WIDTH, 0) + {} + + enum_field_types type() const final { return MYSQL_TYPE_DATE; } + enum_field_types real_type() const final { return MYSQL_TYPE_NEWDATE; } + + uint32 pack_length() const final { return PACK_LENGTH; } +}; + +class Field_year final : public Field_tiny { +public: + Field_year(bool is_nullable_arg, unsigned char null_bit_arg, const char *field_name_arg) + : Field_tiny(YEAR_FIELD_LENGTH, is_nullable_arg, null_bit_arg, field_name_arg, true) + {} + + enum_field_types type() const final { return MYSQL_TYPE_YEAR; } + +private: + static constexpr uint32 YEAR_FIELD_LENGTH = 4; +}; + +/* + Field implementing DATETIME(N) data type, where N=0..6. +*/ +class Field_datetimef : public Field_temporal_with_date_and_timef { +public: + Field_datetimef(bool is_nullable_arg, uchar null_bit_arg, const char *field_name_arg, + uint32 dec_arg) + : Field_temporal_with_date_and_timef(is_nullable_arg, null_bit_arg, field_name_arg, dec_arg) + {} + + enum_field_types type() const final { return MYSQL_TYPE_DATETIME; } + + enum_field_types real_type() const final { return MYSQL_TYPE_DATETIME2; } + + enum_field_types binlog_type() const final { return MYSQL_TYPE_DATETIME2; } + + uint32 pack_length() const final { return my_datetime_binary_length(dec); } + + uint32 pack_length_from_metadata(uint32 field_metadata) const final + { + uint32 tmp = my_datetime_binary_length(field_metadata); + return tmp; + } + + int do_save_field_metadata(uchar *metadata_ptr) + { + LOG_DEBUG("===================datetime 's meta data size %d", decimals()); + *metadata_ptr = decimals(); + return 1; + } +}; + +/****************************************************************************** + string type +******************************************************************************/ + +class Field_str : public Field { +public: + Field_str(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, + const char *field_name_arg); + + uint32 decimals() const override { return DECIMAL_NOT_SPECIFIED; } +}; + +class Field_longstr : public Field_str { +public: + Field_longstr(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, + const char *field_name_arg) + : Field_str(len_arg, is_nullable_arg, null_bit_arg, field_name_arg) + {} +}; + +// char +class Field_string : public Field_longstr { +public: + Field_string(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, + const char *field_name_arg) + : Field_longstr(len_arg, is_nullable_arg, null_bit_arg, field_name_arg) + {} + + enum_field_types type() const final { return MYSQL_TYPE_STRING; } + enum_field_types real_type() const final { return MYSQL_TYPE_STRING; } + + uint32 row_pack_length() const final { return field_length; } + + uint32 pack_length_from_metadata(uint32 field_metadata) const final + { + if (field_metadata == 0) { + return row_pack_length(); + } + return (((field_metadata >> 4) & MASK_HIGH_BITS) ^ MASK_HIGH_BITS) + + (field_metadata & MASK_LOW_BITS); + } + + int do_save_field_metadata(unsigned char *first_byte) const final; + +private: + static constexpr uint32 MASK_HIGH_BITS = 0x300; + static constexpr uint32 MASK_LOW_BITS = 0x00ff; +}; + +// varchar +class Field_varstring : public Field_longstr { +public: + Field_varstring(uint32 len_arg, uint32 length_bytes_arg, bool is_nullable_arg, + unsigned char null_bit_arg, const char *field_name_arg); + + enum_field_types type() const final { return MYSQL_TYPE_VARCHAR; } + enum_field_types real_type() const final { return MYSQL_TYPE_VARCHAR; } + + uint32 pack_length() const final { return (uint32)field_length + length_bytes; } + + uint32 row_pack_length() const final { return field_length; } + + int do_save_field_metadata(unsigned char *first_byte) const final; + +private: + /* Store number of bytes used to store length (1 or 2) */ + uint32 length_bytes; +}; + +class Field_blob : public Field_longstr { +public: + Field_blob(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, + const char *field_name_arg, bool set_packlength) + : Field_longstr(len_arg, is_nullable_arg, null_bit_arg, field_name_arg) + , packlength(LONG_BLOB_PACKLENGTH) + { + set_flag(BLOB_FLAG); + if (set_packlength) { + packlength = len_arg <= MAX_TINY_BLOB_WIDTH ? TINY_BLOB_PACKLENGTH + : len_arg <= MAX_SHORT_BLOB_WIDTH ? SHORT_BLOB_PACKLENGTH + : len_arg <= MAX_MEDIUM_BLOB_WIDTH ? MEDIUM_BLOB_PACKLENGTH + : LONG_BLOB_PACKLENGTH; + } + } + + enum_field_types type() const override { return MYSQL_TYPE_BLOB; } + + uint32 pack_length() const final + { + return packlength; // 已经计算过,只有 1 2 3 4 + } + + uint32 pack_length_no_ptr() const { return (uint32)(packlength); } + + uint32 row_pack_length() const final { return pack_length_no_ptr(); } + + int do_save_field_metadata(unsigned char *first_byte) const override; + +protected: + /** + The number of bytes used to represent the length of the blob. + */ + uint32 packlength; +}; + +class Field_json : public Field_blob { +public: + Field_json(uint32 len_arg, bool is_nullable_arg, uint32 null_bit_arg, + const char *field_name_arg, uint32 blob_pack_length) + : Field_blob(len_arg, is_nullable_arg, null_bit_arg, field_name_arg, blob_pack_length) + {} + + enum_field_types type() const override { return MYSQL_TYPE_JSON; } + + int do_save_field_metadata(unsigned char *first_byte) const final; + // 无 pack_length +}; + +class Field_enum : public Field_str { +public: + Field_enum(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, + const char *field_name_arg, uint32 packlength_arg) + : Field_str(len_arg, is_nullable_arg, null_bit_arg, field_name_arg) + , packlength(packlength_arg) + { + set_flag(ENUM_FLAG); + } + + enum_field_types type() const final { return real_type(); } + + uint32 pack_length() const final { return (uint32)packlength; } + + enum_field_types real_type() const override { return MYSQL_TYPE_ENUM; } + + uint32 pack_length_from_metadata(uint32 field_metadata) const final + { + return (field_metadata & 0x00ff); + } + + int do_save_field_metadata(unsigned char *first_byte) const final; + +protected: + uint32 packlength; +}; + +class Field_set final : public Field_enum { +public: + Field_set(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, + const char *field_name_arg, uint32 packlength_arg) + : Field_enum(len_arg, is_nullable_arg, null_bit_arg, field_name_arg, packlength_arg) + { + clear_flag(ENUM_FLAG); + set_flag(SET_FLAG); + empty_set_string = {"", 0}; + } + + enum_field_types real_type() const final { return MYSQL_TYPE_SET; } + +private: + MYSQL_LEX_CSTRING empty_set_string; +}; + +class Field_bit : public Field { +public: + unsigned char bit_ofs; // offset to 'uneven' high bits + uint32 bit_len; // number of 'uneven' high bits + uint32 bytes_in_rec; + Field_bit(uint32 len_arg, bool is_nullable_arg, unsigned char null_bit_arg, + unsigned char bit_ofs_arg, const char *field_name_arg); + + enum_field_types type() const final { return MYSQL_TYPE_BIT; } + + uint32 pack_length() const final + { + return (uint32)(field_length + (BITS_PER_BYTE - 1)) / BITS_PER_BYTE; + } + + int do_save_field_metadata(unsigned char *first_byte) const final; +}; + +/// 构建 Field 的元数据的 除了 charset 的 5 个字段 +auto make_field(const char *field_name, size_t field_length, bool is_unsigned, bool is_nullable, + size_t null_bit, enum_field_types field_type, int interval_count, + uint32 decimals) -> FieldRef; + +enum_field_types get_blob_type_from_length(size_t length); +size_t calc_pack_length(enum_field_types type, size_t length); + +unsigned int my_time_binary_length(unsigned int dec); +unsigned int my_datetime_binary_length(unsigned int dec); +unsigned int my_timestamp_binary_length(unsigned int dec); + +inline uint32 get_enum_pack_length(int elements) +{ + return elements < MAX_ENUM_ELEMENTS_PER_BYTE ? 1 : 2; +} + +inline uint32 get_set_pack_length(int elements) +{ + uint32 len = (elements + (BITS_PER_BYTE - 1)) / BITS_PER_BYTE; + return len > THRESHOLD_SET_MAX_LENGTH ? MAX_SET_PACK_LENGTH : len; +} + +} // namespace mysql diff --git a/binlogconvert/include/transform_manager.h b/binlogconvert/include/transform_manager.h new file mode 100644 index 0000000000000000000000000000000000000000..784be1aba1ac4d88b1ca389785471e6396a79e27 --- /dev/null +++ b/binlogconvert/include/transform_manager.h @@ -0,0 +1,41 @@ +#pragma once + +#include "format/ddl_generated.h" +#include "format/dml_generated.h" + +#include "binlog.h" +#include "data_handler.h" +#include "events/write_event.h" +#include "utils/table_id.h" + +using namespace loft; + +class LogFormatTransformManager { +public: + LogFormatTransformManager() + : dataHandlerFactory_(std::make_unique()) + {} + ~LogFormatTransformManager() = default; + + // 组装 2 个 event + RC transformDDL(const DDL *ddl, std::vector> &events); + // 组装 5 个 event + RC transformDML(const DML *dml, std::vector> &events); + +private: + inline uint64_t stringToTimestamp(const std::string &timeString); + + inline enum_field_types ConvertStringType(std::string_view type_str); + + RC processRowData(const ::flatbuffers::Vector<::flatbuffers::Offset> &fields, + Rows_event *row, const std::unordered_map &field_map, + const std::vector &field_vec, bool is_before); + + FieldDataHandler *getHandler(loft::DataMeta type) const + { + return dataHandlerFactory_->getHandler(type); + } + +private: + std::unique_ptr dataHandlerFactory_; +}; diff --git a/binlogconvert/include/utils/base64.h b/binlogconvert/include/utils/base64.h new file mode 100644 index 0000000000000000000000000000000000000000..f1c6f079e9a07014992073991cbfcfc07c26ca70 --- /dev/null +++ b/binlogconvert/include/utils/base64.h @@ -0,0 +1,426 @@ +/* Copyright (c) 2003, 2022, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, + as published by the Free Software Foundation. + + This program is also distributed with certain software (including + but not limited to OpenSSL) that is licensed under separate terms, + as designated in a particular file or component or in included license + documentation. The authors of MySQL hereby grant you an additional + permission to link the program and your derivative works with the + separately licensed software that they have included with MySQL. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License, version 2.0, for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +// refer to: include/base64.h +#pragma once + +#include // ceil() + +/* Allow multuple chunks 'AAA= AA== AA==', binlog uses this */ +#define MY_BASE64_DECODE_ALLOW_MULTIPLE_CHUNKS 1 + +static char base64_table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + +/* + Base64 decoder stream +*/ +typedef struct my_base64_decoder_t { + const char *src; /* Pointer to the current input position */ + const char *end; /* Pointer to the end of input buffer */ + uint c; /* Collect bits into this number */ + int error; /* Error code */ + unsigned char state; /* Character number in the current group of 4 */ + unsigned char mark; /* Number of padding marks in the current group */ +} MY_BASE64_DECODER; + +/* + Helper table for decoder. + -2 means "space character" + -1 means "bad character" + Non-negative values mean valid base64 encoding character. +*/ +static int8_t from_base64_table[] = { + /* 00 */ -1, -1, -1, -1, -1, -1, -1, -1, + -1, -2, -2, -2, -2, -2, -1, -1, + /* 10 */ -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + /* 20 */ -2, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 62, -1, -1, -1, 63, /* !"#$%&'()*+,-./ */ + /* 30 */ 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, -1, -1, -1, -1, -1, -1, /* 0123456789:;<=>? */ + /* 40 */ -1, 0, 1, 2, 3, 4, 5, 6, + 7, 8, 9, 10, 11, 12, 13, 14, /* @ABCDEFGHIJKLMNO */ + /* 50 */ 15, 16, 17, 18, 19, 20, 21, 22, + 23, 24, 25, -1, -1, -1, -1, -1, /* PQRSTUVWXYZ[\]^_ */ + /* 60 */ -1, 26, 27, 28, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 38, 39, 40, /* `abcdefghijklmno */ + /* 70 */ 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, -1, -1, -1, -1, -1, /* pqrstuvwxyz{|}~ */ + /* 80 */ -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + /* 90 */ -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + /* A0 */ -2, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + /* B0 */ -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + /* C0 */ -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + /* D0 */ -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + /* E0 */ -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + /* F0 */ -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1}; + +/** + * Skip leading spaces in a base64 encoded stream + * and stop on the first non-space character. + * decoder->src will point to the first non-space character, + * or to the end of the input string. + * In case when end-of-input met on unexpected position, + * decoder->error is also set to 1. + * + * @param decoder Pointer to MY_BASE64_DECODER + * + * @return + * false on success (there are some more non-space input characters) + * true on error (end-of-input found) + */ +static inline bool my_base64_decoder_skip_spaces(MY_BASE64_DECODER *decoder) +{ + for (; decoder->src < decoder->end; decoder->src++) { + if (from_base64_table[(unsigned char)*decoder->src] != -2) { + return false; + } + } + if (decoder->state > 0) { + decoder->error = 1; /* Unexpected end-of-input found */ + } + return true; +} + +/** + * Convert the next character in a base64 encoded stream + * to a number in the range [0..63] + * and mix it with the previously collected value in decoder->c. + * + * @param decoder base64 decoding stream + * + * @return + * false on success + * true on error (invalid base64 character found) + */ +static inline bool my_base64_add(MY_BASE64_DECODER *decoder) +{ + int res; + decoder->c <<= 6; + if ((res = from_base64_table[(unsigned char)*decoder->src++]) < 0) { + return (decoder->error = true); + } + decoder->c += (uint)res; + return false; +} + +/** + * Get the next character from a base64 encoded stream. + * Skip spaces, then scan the next base64 character or a pad character + * and collect bits into decoder->c. + * + * @param decoder Pointer to MY_BASE64_DECODER + * @return + * false on success (a valid base64 encoding character found) + * true on error (unexpected character or unexpected end-of-input found) + */ +static inline bool my_base64_decoder_getch(MY_BASE64_DECODER *decoder) +{ + if (my_base64_decoder_skip_spaces(decoder)) { + return true; /* End-of-input */ + } + + if (!my_base64_add(decoder)) /* Valid base64 character found */ + { + if (decoder->mark) { + /* If we have scanned '=' already, then only '=' is valid */ + // in base64 decoder condition, the decoder->state must be 3 + if (decoder->state == 3) { + decoder->error = 1; + decoder->src--; + return true; /* expected '=', but encoding character found */ + } + } + decoder->state++; + return false; + } + + /* Process error */ + switch (decoder->state) { + case 0: + case 1: + decoder->src--; + return true; /* base64 character expected */ + + case 2: + case 3: + if (decoder->src[-1] == '=') { + decoder->error = 0; /* Not an error - it's a pad character */ + decoder->mark++; + } else { + decoder->src--; + return true; /* base64 character or '=' expected */ + } + break; + + default: + return true; /* Wrong state, should not happen */ + } + + decoder->state++; + return false; +} + +/* + Calculate how much memory needed for dst of base64_encode() +*/ +static inline u_int64_t base64_needed_encoded_length(u_int64_t length_of_data) +{ + u_int64_t nb_base64_chars; + if (length_of_data == 0) { + return 1; + } + nb_base64_chars = (length_of_data + 2) / 3 * 4; + + return nb_base64_chars + /* base64 char incl padding */ + (nb_base64_chars - 1) / 76 + /* newlines */ + 1; /* NUL termination of string */ +} + +/* + Maximum length base64_encode_needed_length() can accept with no overflow. +*/ +static inline u_int64_t base64_encode_max_arg_length() +{ +#if (SIZEOF_VOIDP == 8) + /* + 6827690988321067803 -> 9223372036854775805 + 6827690988321067804 -> -9223372036854775807 + */ + return 0x5EC0D4C77B03531BLL; +#else + /* + 1589695686 -> 2147483646 + 1589695687 -> -2147483645 + */ + return 0x5EC0D4C6; +#endif +} + +/* + Calculate how much memory needed for dst of base64_decode() +*/ +static inline u_int64_t base64_needed_decoded_length(u_int64_t length_of_encoded_data) +{ + return static_cast(ceil(static_cast(length_of_encoded_data * 3 / 4))); +} + +/* + Maximum length base64_decode_needed_length() can accept with no overflow. +*/ +static inline u_int64_t base64_decode_max_arg_length() +{ +#if (SIZEOF_VOIDP == 8) + return 0x2AAAAAAAAAAAAAAALL; +#else + return 0x2AAAAAAA; +#endif +} + +/* + Encode data as a base64 string +*/ +static inline int base64_encode(const void *src, size_t src_len, char *dst) +{ + const unsigned char *s = (const unsigned char *)src; + size_t i = 0; + size_t len = 0; + + for (; i < src_len; len += 4) { + unsigned c; + + if (len == 76) { + len = 0; + *dst++ = '\n'; + } + + c = s[i++]; + c <<= 8; + + if (i < src_len) { + c += s[i]; + } + c <<= 8; + i++; + + if (i < src_len) { + c += s[i]; + } + i++; + + *dst++ = base64_table[(c >> 18) & 0x3f]; + *dst++ = base64_table[(c >> 12) & 0x3f]; + + if (i > (src_len + 1)) { + *dst++ = '='; + } else { + *dst++ = base64_table[(c >> 6) & 0x3f]; + } + + if (i > src_len) { + *dst++ = '='; + } else { + *dst++ = base64_table[(c >> 0) & 0x3f]; + } + } + *dst = '\0'; + + return 0; +} + +/** + * Decode a base64 string + * The base64-encoded data in the range ['src','*end_ptr') will be + * decoded and stored starting at 'dst'. The decoding will stop + * after 'len' characters have been read from 'src', or when padding + * occurs in the base64-encoded data. In either case: if 'end_ptr' is + * non-null, '*end_ptr' will be set to point to the character after + * the last read character, even in the presence of error. + * + * Note: We require that 'dst' is pre-allocated to correct size. + * + * @param src_base Pointer to base64-encoded string + * @param len Length of string at 'src' + * @param dst Pointer to location where decoded data will be stored + * @param end_ptr Pointer to variable that will refer to the character + * after the end of the encoded data that were decoded. + * Can be NULL. + * @param flags flags e.g. allow multiple chunks + * @return Number of bytes written at 'dst', or -1 in case of failure + */ +static inline int64_t base64_decode(const char *src_base, size_t len, void *dst, + const char **end_ptr, int flags) +{ + char *d = (char *)dst; + MY_BASE64_DECODER decoder; + + decoder.src = src_base; + decoder.end = src_base + len; + decoder.error = 0; + decoder.mark = 0; + + for (;;) { + decoder.c = 0; + decoder.state = 0; + + if (my_base64_decoder_getch(&decoder) || my_base64_decoder_getch(&decoder) || + my_base64_decoder_getch(&decoder) || my_base64_decoder_getch(&decoder)) { + break; + } + + *d++ = (decoder.c >> 16) & 0xff; + *d++ = (decoder.c >> 8) & 0xff; + *d++ = (decoder.c >> 0) & 0xff; + + if (decoder.mark) { + d -= decoder.mark; + if (!(flags & MY_BASE64_DECODE_ALLOW_MULTIPLE_CHUNKS)) { + break; + } + decoder.mark = 0; + } + } + + /* Return error if there are more non-space characters */ + decoder.state = 0; + if (!my_base64_decoder_skip_spaces(&decoder)) { + decoder.error = 1; + } + + if (end_ptr != nullptr) { + *end_ptr = decoder.src; + } + + return decoder.error ? -1 : (int)(d - (char *)dst); +} + +/** + * binary 解码 + */ +const std::string BASE64_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + +inline bool is_base64(char c) +{ + return (isalnum(c) || (c == '+') || (c == '/')); +} + +// Base64解码函数 +static inline std::vector base64_decode(const std::string &encoded_string) +{ + size_t in_len = encoded_string.size(); + size_t i = 0; + size_t j = 0; + size_t in_ = 0; + char char_array_4[4], char_array_3[3]; + std::vector ret; + + while (in_len-- && (encoded_string[in_] != '=') && is_base64(encoded_string[in_])) { + char_array_4[i++] = encoded_string[in_]; + in_++; + if (i == 4) { + for (i = 0; i < 4; i++) { + char_array_4[i] = BASE64_CHARS.find(char_array_4[i]); + } + + char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4); + char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); + char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3]; + + for (i = 0; i < 3; i++) { + ret.push_back(char_array_3[i]); + } + i = 0; + } + } + + if (i) { + for (j = i; j < 4; j++) { + char_array_4[j] = 0; + } + + for (j = 0; j < 4; j++) { + char_array_4[j] = BASE64_CHARS.find(char_array_4[j]); + } + + char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4); + char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); + char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3]; + + for (j = 0; j < i - 1; j++) { + ret.push_back(char_array_3[j]); + } + } + + return ret; +} diff --git a/binlogconvert/include/utils/decimal.h b/binlogconvert/include/utils/decimal.h new file mode 100644 index 0000000000000000000000000000000000000000..e7c0187f05cda66f59b8857ada63e73a201f7f45 --- /dev/null +++ b/binlogconvert/include/utils/decimal.h @@ -0,0 +1,173 @@ +/* Copyright (c) 2004, 2022, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, + as published by the Free Software Foundation. + + This program is also distributed with certain software (including + but not limited to OpenSSL) that is licensed under separate terms, + as designated in a particular file or component or in included license + documentation. The authors of MySQL hereby grant you an additional + permission to link the program and your derivative works with the + separately licensed software that they have included with MySQL. + + Without limiting anything contained in the foregoing, this file, + which is part of C Driver for MySQL (Connector/C), is also subject to the + Universal FOSS Exception, version 1.0, a copy of which can be found at + http://oss.oracle.com/licenses/universal-foss-exception. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License, version 2.0, for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +// refer to: include/decimal.h +#pragma once + +#include +#include + +#include "common/mysql_constant_def.h" + +constexpr int BASE10 = 10; // 基数10,用于计算10的幂次 +constexpr int POW_10_0 = 1; +constexpr int POW_10_1 = 10; +constexpr int POW_10_2 = 100; +constexpr int POW_10_3 = 1000; +constexpr int POW_10_4 = 10000; +constexpr int POW_10_5 = 100000; +constexpr int POW_10_6 = 1000000; +constexpr int POW_10_7 = 10000000; +constexpr int POW_10_8 = 100000000; +constexpr int POW_10_9 = 1000000000; + +static const int dig2bytes[DIG_PER_DEC1 + 1] = {0, 1, 1, 2, 2, 3, 3, 4, 4, 4}; +static const dec1 powers10[DIG_PER_DEC1 + 1] = {POW_10_0, POW_10_1, POW_10_2, POW_10_3, POW_10_4, + POW_10_5, POW_10_6, POW_10_7, POW_10_8, POW_10_9}; + +struct decimal_t { + int intg = 0, frac = 0, len = 9; + bool sign = false; + int32_t *buf; +}; + +typedef enum { TRUNCATE = 0, HALF_EVEN, HALF_UP, CEILING, FLOOR } decimal_round_mode; + +template struct DigitCounter { + constexpr int operator()(T x) const + { + constexpr int mid = (MinDigits + MaxDigits) / 2; + constexpr T pivot = pow10(mid); + if (x < pivot) { + return DigitCounter()(x); + } else { + return DigitCounter()(x); + } + } + +private: + static constexpr T pow10(int n) + { + T x = 1; + for (int i = 0; i < n; ++i) { + x *= BASE10; + } + return x; + } +}; + +template +struct DigitCounter::type> { + constexpr int operator()(T) const { return MinDigits; } +}; + +template constexpr int count_digits(T x) +{ + return DigitCounter::digits10 + 1>()(x); +} + +static inline dec1 mod_by_pow10(dec1 x, int p) +{ + // See div_by_pow10 for rationale. + switch (p) { + case 1: + return static_cast(x) % POW_10_1; + case 2: + return static_cast(x) % POW_10_2; + case 3: + return static_cast(x) % POW_10_3; + case 4: + return static_cast(x) % POW_10_4; + case 5: + return static_cast(x) % POW_10_5; + case 6: + return static_cast(x) % POW_10_6; + case 7: + return static_cast(x) % POW_10_7; + case 8: + return static_cast(x) % POW_10_8; + default: + return x % powers10[p]; + } +} + +static inline dec1 div_by_pow10(dec1 x, int p) +{ + switch (p) { + case 0: + return static_cast(x) / 1; + case 1: + return static_cast(x) / POW_10_1; + case 2: + return static_cast(x) / POW_10_2; + case 3: + return static_cast(x) / POW_10_3; + case 4: + return static_cast(x) / POW_10_4; + case 5: + return static_cast(x) / POW_10_5; + case 6: + return static_cast(x) / POW_10_6; + case 7: + return static_cast(x) / POW_10_7; + case 8: + return static_cast(x) / POW_10_8; + default: + return x / powers10[p]; + } +} + +static inline dec1 *remove_leading_zeroes(const decimal_t *from, int *intg_result) +{ + // Round up intg so that we don't need special handling of the first word. + int intg = ROUND_UP(from->intg) * DIG_PER_DEC1; + + // Remove all the leading words that contain only zeros. + dec1 *buf0 = from->buf; + while (intg > 0 && *buf0 == 0) { + ++buf0; + intg -= DIG_PER_DEC1; + } + + // Now remove all the leading zeros in the first non-zero word, if there is + // a non-zero word. + if (intg > 0) { + const int digits = count_digits(*buf0); + intg -= DIG_PER_DEC1 - digits; + } + + *intg_result = intg; + return buf0; +} + +int decimal_is_zero(const decimal_t *from); +int decimal_shift(decimal_t *dec, int shift); +int decimal_round(const decimal_t *from, decimal_t *to, int new_scale, decimal_round_mode mode); +longlong my_strtoll10(const char *nptr, const char **endptr, int *error); +int string2decimal(const char *from, decimal_t *to, const char **end); +int decimal2bin(const decimal_t *from, uchar *to, int precision, int frac); diff --git a/binlogconvert/include/utils/little_endian.h b/binlogconvert/include/utils/little_endian.h new file mode 100644 index 0000000000000000000000000000000000000000..cde4b74e741b5882f620f9941cc9bc097087a0a9 --- /dev/null +++ b/binlogconvert/include/utils/little_endian.h @@ -0,0 +1,187 @@ +/* Copyright (c) 2001, 2022, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, + as published by the Free Software Foundation. + + This program is also distributed with certain software (including + but not limited to OpenSSL) that is licensed under separate terms, + as designated in a particular file or component or in included license + documentation. The authors of MySQL hereby grant you an additional + permission to link the program and your derivative works with the + separately licensed software that they have included with MySQL. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License, version 2.0, for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +// refer to: include/my_byteorder.h +#pragma once + +#include // memcpy + +#include "common/type_def.h" + +constexpr int BYTE_SHIFT_1 = 8; +constexpr int BYTE_SHIFT_2 = 16; +constexpr int BYTE_SHIFT_3 = 24; +constexpr int BYTE_SHIFT_4 = 32; +constexpr int BYTE_SHIFT_5 = 40; + +constexpr int MAX_1_BYTE_LENGTH = 251; +constexpr int MAX_2_BYTE_LENGTH = 65535; +constexpr int MAX_3_BYTE_LENGTH = 16777215; + +static inline void int3store(uchar *T, uint A) +{ + *(T) = (uchar)(A); + *(T + 1) = (uchar)(A >> BYTE_SHIFT_1); + *(T + 2) = (uchar)(A >> BYTE_SHIFT_2); +} + +static inline void int5store(uchar *T, uint64 A) +{ + *(T) = (uchar)(A); + *(T + 1) = (uchar)(A >> BYTE_SHIFT_1); + *(T + 2) = (uchar)(A >> BYTE_SHIFT_2); + *(T + 3) = (uchar)(A >> BYTE_SHIFT_3); + *(T + 4) = (uchar)(A >> BYTE_SHIFT_4); +} + +static inline void int6store(uchar *T, uint64 A) +{ + *(T) = (uchar)(A); + *(T + 1) = (uchar)(A >> BYTE_SHIFT_1); + *(T + 2) = (uchar)(A >> BYTE_SHIFT_2); + *(T + 3) = (uchar)(A >> BYTE_SHIFT_3); + *(T + 4) = (uchar)(A >> BYTE_SHIFT_4); + *(T + 5) = (uchar)(A >> BYTE_SHIFT_5); +} + +static inline void int2store(uchar *T, uint16 A) +{ + memcpy(T, &A, sizeof(A)); +} + +static inline void int4store(uchar *T, uint32 A) +{ + memcpy(T, &A, sizeof(A)); +} + +// Store only 7 bytes of the 8-byte uint64 value +static inline void int7store(uchar *T, uint64 A) +{ + memcpy(T, &A, 7); +} + +static inline void int8store(uchar *T, uint64 A) +{ + memcpy(T, &A, sizeof(A)); +} + +static uchar *net_store_length(uchar *packet, uint64 length) +{ + if (length < (uint64)MAX_1_BYTE_LENGTH) { + *packet = (uchar)length; + return packet + 1; + } + /* 251 is reserved for NULL */ + + if (length < (uint64)MAX_2_BYTE_LENGTH) { + *packet++ = 252; // Indicates that the length is stored in the next 2 bytes + int2store(packet, (uint)length); + return packet + 2; // 2 bytes for the length + } + if (length < (uint64)MAX_3_BYTE_LENGTH) { + *packet++ = 253; // Indicates that the length is stored in the next 3 bytes + int3store(packet, (ulong)length); + return packet + 3; // 3 bytes for the length + } + *packet++ = 254; // Indicates that the length is 16MB or more and will be + // stored in 8 bytes + int8store(packet, length); + return packet + 8; // Return packet incremented by the size of the length + // field (8 bytes) +} + +// used in write_event.cpp +static void set_N_bit(uchar &f, int N) +{ + f |= (1 << (N - 1)); +} + +static void clear_N_bit(uchar &f, int N) +{ + f &= ~(1 << (N - 1)); +} + +// used in decimal.cpp +#define mi_int1store(T, A) *((uchar *)(T)) = (uchar)(A) + +#define mi_int2store(T, A) \ + { \ + uint def_temp = (uint)(A); \ + ((uchar *)(T))[1] = (uchar)(def_temp); \ + ((uchar *)(T))[0] = (uchar)(def_temp >> BYTE_SHIFT_1); \ + } +#define mi_int3store(T, A) \ + { /*lint -save -e734 */ \ + ulong def_temp = (ulong)(A); \ + ((uchar *)(T))[2] = (uchar)(def_temp); \ + ((uchar *)(T))[1] = (uchar)(def_temp >> BYTE_SHIFT_1); \ + ((uchar *)(T))[0] = (uchar)(def_temp >> BYTE_SHIFT_2); \ + /*lint -restore */} +#define mi_int4store(T, A) \ + { \ + ulong def_temp = (ulong)(A); \ + ((uchar *)(T))[3] = (uchar)(def_temp); \ + ((uchar *)(T))[2] = (uchar)(def_temp >> BYTE_SHIFT_1); \ + ((uchar *)(T))[1] = (uchar)(def_temp >> BYTE_SHIFT_2); \ + ((uchar *)(T))[0] = (uchar)(def_temp >> BYTE_SHIFT_3); \ + } +#define mi_int5store(T, A) \ + { \ + ulong def_temp = (ulong)(A), def_temp2 = (ulong)((A) >> BYTE_SHIFT_4); \ + ((uchar *)(T))[4] = (uchar)(def_temp); \ + ((uchar *)(T))[3] = (uchar)(def_temp >> BYTE_SHIFT_1); \ + ((uchar *)(T))[2] = (uchar)(def_temp >> BYTE_SHIFT_2); \ + ((uchar *)(T))[1] = (uchar)(def_temp >> BYTE_SHIFT_3); \ + ((uchar *)(T))[0] = (uchar)(def_temp2); \ + } +#define mi_int6store(T, A) \ + { \ + ulong def_temp = (ulong)(A), def_temp2 = (ulong)((A) >> BYTE_SHIFT_4); \ + ((uchar *)(T))[5] = (uchar)(def_temp); \ + ((uchar *)(T))[4] = (uchar)(def_temp >> BYTE_SHIFT_1); \ + ((uchar *)(T))[3] = (uchar)(def_temp >> BYTE_SHIFT_2); \ + ((uchar *)(T))[2] = (uchar)(def_temp >> BYTE_SHIFT_3); \ + ((uchar *)(T))[1] = (uchar)(def_temp2); \ + ((uchar *)(T))[0] = (uchar)(def_temp2 >> BYTE_SHIFT_1); \ + } +#define mi_int7store(T, A) \ + { \ + ulong def_temp = (ulong)(A), def_temp2 = (ulong)((A) >> BYTE_SHIFT_4); \ + ((uchar *)(T))[6] = (uchar)(def_temp); \ + ((uchar *)(T))[5] = (uchar)(def_temp >> BYTE_SHIFT_1); \ + ((uchar *)(T))[4] = (uchar)(def_temp >> BYTE_SHIFT_2); \ + ((uchar *)(T))[3] = (uchar)(def_temp >> BYTE_SHIFT_3); \ + ((uchar *)(T))[2] = (uchar)(def_temp2); \ + ((uchar *)(T))[1] = (uchar)(def_temp2 >> BYTE_SHIFT_1); \ + ((uchar *)(T))[0] = (uchar)(def_temp2 >> BYTE_SHIFT_2); \ + } +#define mi_int8store(T, A) \ + { \ + ulong def_temp3 = (ulong)(A), def_temp4 = (ulong)((A) >> BYTE_SHIFT_4); \ + mi_int4store((uchar *)(T) + 0, def_temp4); \ + mi_int4store((uchar *)(T) + 4, def_temp3); \ + } + +static inline void float8store(uchar *V, double M) +{ + memcpy(V, &M, sizeof(double)); +} diff --git a/binlogconvert/include/utils/my_time.h b/binlogconvert/include/utils/my_time.h new file mode 100644 index 0000000000000000000000000000000000000000..c9c5acd16f13b6ed56ae0fecf6e95292e323610f --- /dev/null +++ b/binlogconvert/include/utils/my_time.h @@ -0,0 +1,114 @@ +/* Copyright (c) 2004, 2022, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, + as published by the Free Software Foundation. + + This program is also distributed with certain software (including + but not limited to OpenSSL) that is licensed under separate terms, + as designated in a particular file or component or in included license + documentation. The authors of MySQL hereby grant you an additional + permission to link the program and your derivative works with the + separately licensed software that they have included with MySQL. + + Without limiting anything contained in the foregoing, this file, + which is part of C Driver for MySQL (Connector/C), is also subject to the + Universal FOSS Exception, version 1.0, a copy of which can be found at + http://oss.oracle.com/licenses/universal-foss-exception. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License, version 2.0, for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +// refer to: include/my_time.h +#pragma once + +#include + +#include +#include + +#include "little_endian.h" +#include "sql/field_common_properties.h" + +constexpr const int TIME_MAX_HOUR = 838; +constexpr const int MINS_PER_HOUR = 60; +constexpr const int64_t SECONDS_IN_24H = 86400LL; +constexpr const int MYTIME_MIN_VALUE = 0; +constexpr const bool HAVE_64_BITS_TIME_T = sizeof(time_t) == sizeof(int64_t); + +constexpr const int64_t MYTIME_MAX_VALUE = + HAVE_64_BITS_TIME_T ? 32536771199 : std::numeric_limits::max(); + +enum enum_mysql_timestamp_type { + MYSQL_TIMESTAMP_NONE = -2, + MYSQL_TIMESTAMP_ERROR = -1, + + /// Stores year, month and day components. + MYSQL_TIMESTAMP_DATE = 0, + + /** + Stores all date and time components. + Value is in UTC for `TIMESTAMP` type. + Value is in local time zone for `DATETIME` type. + */ + MYSQL_TIMESTAMP_DATETIME = 1, + + /// Stores hour, minute, second and microsecond. + MYSQL_TIMESTAMP_TIME = 2, + + /** + A temporary type for `DATETIME` or `TIMESTAMP` types equipped with time + zone information. After the time zone information is reconciled, the type + is converted to MYSQL_TIMESTAMP_DATETIME. + */ + MYSQL_TIMESTAMP_DATETIME_TZ = 3 +}; + +typedef struct MYSQL_TIME { + unsigned int year = 0, month = 0, day = 0, hour = 0, minute = 0, second = 0; + unsigned long second_part = 0; /**< microseconds */ + bool neg = false; + enum enum_mysql_timestamp_type time_type; + /// The time zone displacement, specified in seconds. + int time_zone_displacement; +} MYSQL_TIME; + +struct my_timeval { + int64_t m_tv_sec; + int64_t m_tv_usec; +}; + +inline long long int my_packed_time_get_frac_part(long long int i) +{ + return (i % (1LL << 24)); +} + +bool check_datetime_range(const MYSQL_TIME &my_time); + +longlong TIME_to_longlong_time_packed(const MYSQL_TIME &my_time); + +void my_time_packed_to_binary(longlong nr, uchar *ptr, uint dec); + +longlong TIME_to_longlong_datetime_packed(const MYSQL_TIME &my_time); + +void my_datetime_packed_to_binary(longlong nr, uchar *ptr, uint dec); + +void my_timestamp_to_binary(const my_timeval *tm, uchar *ptr, uint dec); + +void str_to_time(const char *str, std::size_t length, MYSQL_TIME *l_time); + +void str_to_datetime(const char *str_arg, std::size_t length, MYSQL_TIME *l_time); + +void int_to_date(const char *date_arg, std::size_t length, MYSQL_TIME *l_time); + +void double_to_time(const char *time_arg, std::size_t length, MYSQL_TIME *l_time); + +void datetime_to_timeval(const MYSQL_TIME *ltime, my_timeval *tm); + +longlong TIME_to_longlong_packed(const MYSQL_TIME &my_time); diff --git a/binlogconvert/include/utils/rpl_gtid.h b/binlogconvert/include/utils/rpl_gtid.h new file mode 100644 index 0000000000000000000000000000000000000000..3b6834868cd41d78d1f4e1de3b5e3429e6b49e53 --- /dev/null +++ b/binlogconvert/include/utils/rpl_gtid.h @@ -0,0 +1,312 @@ +/* Copyright (c) 2011, 2022, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, + as published by the Free Software Foundation. + + This program is also distributed with certain software (including + but not limited to OpenSSL) that is licensed under separate terms, + as designated in a particular file or component or in included license + documentation. The authors of MySQL hereby grant you an additional + permission to link the program and your derivative works with the + separately licensed software that they have included with MySQL. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License, version 2.0, for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +// refer to: sql/rpl_gtid.h +#pragma once + +#include +#include +#include +#include +#include + +#include "utils/uuid.h" + +enum enum_gtid_type { + AUTOMATIC_GTID = 0, + ASSIGNED_GTID, + ANONYMOUS_GTID, + UNDEFINED_GTID, + NOT_YET_DETERMINED_GTID, + PRE_GENERATE_GTID +}; + +enum enum_return_status { + /// The function completed successfully. + RETURN_STATUS_OK = 0, + /// The function completed with error but did not report it. + RETURN_STATUS_UNREPORTED_ERROR = 1, + /// The function completed with error and has called my_error. + RETURN_STATUS_REPORTED_ERROR = 2 +}; + +// GTID: {SID, GNO} also known as {uuid, sequence number} +using rpl_sidno = int32_t; +using rpl_gno = int64_t; +using rpl_sid = binary_log::Uuid; + +/// One-past-the-max value of GNO +const rpl_gno GNO_END = INT64_MAX; +/// The length of MAX_GNO when printed in decimal. +const int MAX_GNO_TEXT_LENGTH = 19; + +/* + * 准备两个 map,可互查 + */ +class Sid_map { +public: + Sid_map() + : sidno_to_sid_map_() + , sid_to_sidno_map_() + {} + + ~Sid_map() { clear(); } + + enum_return_status clear() + { + sid_to_sidno_map_.clear(); + sidno_to_sid_map_.clear(); + return RETURN_STATUS_OK; + } + + // 有关 map 的操作 + rpl_sidno add_sid(const rpl_sid &sid); + + rpl_sidno get_max_sidno() const { return static_cast(sidno_to_sid_map_.size()); } + + enum_return_status add_node(rpl_sidno sidno, const rpl_sid &sid); + + /** + SID -> SIDNO + 如果不在 sidmap 中,返回 0 + */ + rpl_sidno sid_to_sidno(const rpl_sid &sid) const + { + const auto it = sid_to_sidno_map_.find(sid); + if (it == sid_to_sidno_map_.end()) { + return 0; + } + return it->second->sidno_; + } + + /** + * @brief SIDNO -> SID 的映射, 在 array 里找 + */ + const rpl_sid &sidno_to_sid(rpl_sidno sidno) const + { + const rpl_sid &ret = (sidno_to_sid_map_[sidno - 1])->sid_; + return ret; + } + +private: + /// Node pointed to by both the hash and the array. + struct Node { + rpl_sidno sidno_; + rpl_sid sid_; + }; + + static const unsigned char *sid_map_get_key(const unsigned char *ptr, size_t *length) + { + const Node *node = pointer_cast(ptr); + *length = binary_log::Uuid::BYTE_LENGTH; + return node->sid_.bytes; + } + + /// SIDNO -> SID 的映射用 array 的下标直接索引 + std::vector> sidno_to_sid_map_; + /// SID -> SIDNO 的映射用 hash 表实现 + std::unordered_map, binary_log::Hash_Uuid> sid_to_sidno_map_; +}; + +struct Gtid { + /// SIDNO of this Gtid. + rpl_sidno sidno_; + /// GNO of this Gtid. + rpl_gno gno_; + + /// Set both components to 0. + void clear() + { + sidno_ = 0; + gno_ = 0; + } + + /// Set both components to the given, positive values. + void set(rpl_sidno sidno_arg, rpl_gno gno_arg) + { + // 需要保证 sidno_arg > 0, 0 < gno_arg < GNO_END + sidno_ = sidno_arg; + gno_ = gno_arg; + } + + /** + Return true if sidno is zero (and assert that gno is zero too in + this case). + */ + bool is_empty() const + { + // check that gno is not set inconsistently + if ((sidno_ <= 0 && gno_ != 0) || (sidno_ > 0 && gno_ <= 0)) { + return false; + } + return sidno_ == 0; + } + + /** + The maximal length of the textual representation of a SID, not + including the terminating '\0'. + */ + static const int MAX_TEXT_LENGTH = binary_log::Uuid::TEXT_LENGTH + 1 + MAX_GNO_TEXT_LENGTH; + /** + 返回 parse() 的结果 + */ + static bool is_valid(const char *text); + + int to_string(const rpl_sid &sid, char *buf) const; + + int to_string(const Sid_map *sid_map, char *buf) const; + + /// Returns true if this Gtid has the same sid and gno as 'other'. + bool equals(const Gtid &other) const { return sidno_ == other.sidno_ && gno_ == other.gno_; } + + enum_return_status parse(Sid_map *sid_map, const char *text); +}; + +/** + * @brief 一个具体 statement 的 GTID 表示,可能为 AUTOMATIC, ANONYMOUS, 或者 SID:GNO + */ +struct Gtid_specification { + enum_gtid_type type_; + /** + The GTID: + { SIDNO, GNO } if type == ASSIGNED_GTID; + { 0, 0 } if type == AUTOMATIC or ANONYMOUS. + */ + Gtid gtid_; + + /// Set the type to ASSIGNED_GTID and SID, GNO to the given values. + void set(rpl_sidno sidno, rpl_gno gno) + { + gtid_.set(sidno, gno); + type_ = ASSIGNED_GTID; + } + + /// Set the type to ASSIGNED_GTID and SID, GNO to the given Gtid. + void set(const Gtid >id_param) { set(gtid_param.sidno_, gtid_param.gno_); } + + /// Set the type to AUTOMATIC_GTID. + void set_automatic() { type_ = AUTOMATIC_GTID; } + + /// Set the type to ANONYMOUS_GTID. + void set_anonymous() { type_ = ANONYMOUS_GTID; } + + /// Set the type to NOT_YET_DETERMINED_GTID. + void set_not_yet_determined() { type_ = NOT_YET_DETERMINED_GTID; } + + /// Set to undefined. Must only be called if the type is ASSIGNED_GTID. + void set_undefined() + { + if (type_ != ASSIGNED_GTID) { + return; + } + type_ = UNDEFINED_GTID; + } + + /// Return true if this Gtid_specification is equal to 'other'. + bool equals(const Gtid_specification &other) const + { + return (type_ == other.type_ && (type_ != ASSIGNED_GTID || gtid_.equals(other.gtid_))); + } + + /** + Return true if this Gtid_specification is a ASSIGNED_GTID with the + same SID, GNO as 'other_gtid'. + */ + bool equals(const Gtid &other_gtid) const + { + return type_ == ASSIGNED_GTID && gtid_.equals(other_gtid); + } + + enum_return_status parse(Sid_map *sid_map, const char *text); + /// Returns true if the given string is a valid Gtid_specification. + static bool is_valid(const char *text); + + static const int MAX_TEXT_LENGTH = Gtid::MAX_TEXT_LENGTH; + + int to_string(const rpl_sid *sid, char *buf) const; + + int to_string(const Sid_map *sid_map, char *buf) const; + /** + 如果 ANONYMOUS_GTID or AUTOMATIC_GTID 类型的 GTID,那么 sid = null + */ +}; + +class Gtid_set { +public: + Gtid_set(Sid_map *sid_map) + : sid_map_(sid_map) {}; + ~Gtid_set(); + + void clear() { sid_map_->clear(); } + + /** + Encodes this Gtid_set as a binary string. + */ + void encode(unsigned char *buf) const; + + /** + Returns the length of this Gtid_set when encoded using the + encode() function. + */ + size_t get_encoded_length() const; + +public: + Sid_map *sid_map_; +}; + +/* + Gtid_set. 可能为 null 的情况 + 如果为 null ,也需要考虑有 Gtid_set 对象,使用 memset(0), 这样可以复用 + malloc 内存的逻辑 +*/ + +struct Gtid_set_or_null { + /// Pointer to the Gtid_set. + Gtid_set *gtid_set; + /// True if this Gtid_set is NULL. + bool is_non_null; + + /// Return NULL if this is NULL, otherwise return the Gtid_set. + inline Gtid_set *get_gtid_set() const + { + if (is_non_null && gtid_set == nullptr) { + return nullptr; + } + return is_non_null ? gtid_set : nullptr; + } + + Gtid_set *set_non_null(Sid_map *sm) + { + if (!is_non_null) { + if (gtid_set == nullptr) { + gtid_set = new Gtid_set(sm); + } else { + gtid_set->clear(); + } + } + is_non_null = (gtid_set != nullptr); + return gtid_set; + } + + /// Set this Gtid_set to NULL. + inline void set_null() { is_non_null = false; } +}; diff --git a/binlogconvert/include/utils/table_id.h b/binlogconvert/include/utils/table_id.h new file mode 100644 index 0000000000000000000000000000000000000000..32bb9a9867070e05ef8d842514daef7ac2d76ec6 --- /dev/null +++ b/binlogconvert/include/utils/table_id.h @@ -0,0 +1,70 @@ +/* Copyright (c) 2013, 2022, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, + as published by the Free Software Foundation. + + This program is also distributed with certain software (including + but not limited to OpenSSL) that is licensed under separate terms, + as designated in a particular file or component or in included license + documentation. The authors of MySQL hereby grant you an additional + permission to link the program and your derivative works with the + separately licensed software that they have included with MySQL. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License, version 2.0, for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +// refer to: libbinlogevents/include/table_id.h +#pragma once + +#include +#include + +class Table_id { +public: + Table_id() + : m_id_(0) + {} + + explicit Table_id(unsigned long long id) + : m_id_(id) + {} + + unsigned long long get_id() const { return m_id_; } + + bool is_valid() const { return m_id_ <= TABLE_ID_MAX; } + + Table_id &operator=(unsigned long long id) + { + m_id_ = id; + return *this; + } + + bool operator==(const Table_id &tid) const { return m_id_ == tid.m_id_; } + + bool operator!=(const Table_id &tid) const { return m_id_ != tid.m_id_; } + + /* Support implicit type converting from Table_id to unsigned long long */ + operator unsigned long long() const { return m_id_; } + + Table_id operator++(int) + { + Table_id id(m_id_); + + /* m_id is reset to 0, when it exceeds the max value. */ + m_id_ = (m_id_ == TABLE_ID_MAX ? 0 : m_id_ + 1); + + return id; + } + +private: + /* In table map event and rows events, table id is 6 bytes.*/ + static const unsigned long long TABLE_ID_MAX = (~0ULL >> 16); + uint64_t m_id_; +}; diff --git a/binlogconvert/include/utils/template_utils.h b/binlogconvert/include/utils/template_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..e8c0c09f93aad17910717cdeaff3e28d990e5117 --- /dev/null +++ b/binlogconvert/include/utils/template_utils.h @@ -0,0 +1,37 @@ +/* Copyright (c) 2013, 2022, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, + as published by the Free Software Foundation. + + This program is also distributed with certain software (including + but not limited to OpenSSL) that is licensed under separate terms, + as designated in a particular file or component or in included license + documentation. The authors of MySQL hereby grant you an additional + permission to link the program and your derivative works with the + separately licensed software that they have included with MySQL. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License, version 2.0, for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +// refer to: include/template_utils.h +#pragma once + +#include +#include + +template inline T pointer_cast(void *p) +{ + return static_cast(p); +} + +template inline const T pointer_cast(const void *p) +{ + return static_cast(p); +} diff --git a/binlogconvert/include/utils/uuid.h b/binlogconvert/include/utils/uuid.h new file mode 100644 index 0000000000000000000000000000000000000000..5e76159b31ac20d5be379f6d271bbf9c3ca91f29 --- /dev/null +++ b/binlogconvert/include/utils/uuid.h @@ -0,0 +1,130 @@ +/* Copyright (c) 2017, 2022, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, + as published by the Free Software Foundation. + + This program is also distributed with certain software (including + but not limited to OpenSSL) that is licensed under separate terms, + as designated in a particular file or component or in included license + documentation. The authors of MySQL hereby grant you an additional + permission to link the program and your derivative works with the + separately licensed software that they have included with MySQL. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License, version 2.0, for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +// refer to: libbinlogevents/include/uuid.h +#pragma once + +#include +#include +#include +#include +#include + +#include "template_utils.h" + +/** + 标识:在 server 上发起的 txn 编号, 是一个 hash 值 + used in Sid_map::Node, member name is rpl_sid + + 只有一个成员 + unsigned char bytes[BYTE_LENGTH]; + + 有 3 种表示形式: + XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX or + XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX or + {XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX} +*/ +namespace binary_log { + +struct Uuid { + // uuid 字节长度 + static const size_t BYTE_LENGTH = 16; + /** The data for this Uuid. */ + unsigned char bytes[BYTE_LENGTH]; + + /// Set to all zeros. + void clear() { memset(bytes, 0, BYTE_LENGTH); } + + /// Copies the given 16-byte data to this UUID. + void copy_from(const unsigned char *data) { memcpy(bytes, data, BYTE_LENGTH); } + + /// Copies the given UUID object to this UUID. + void copy_from(const Uuid &data) { copy_from(static_cast(data.bytes)); } + + /// Copies the given UUID object to this UUID. + void copy_to(unsigned char *data) const { memcpy(data, bytes, BYTE_LENGTH); } + + /// Returns true if this UUID is equal the given UUID. + bool equals(const Uuid &other) const { return memcmp(bytes, other.bytes, BYTE_LENGTH) == 0; } + + /// uuid 文本长度 + static const size_t TEXT_LENGTH = 36; + /// uuid 比特长度 + static const size_t BIT_LENGTH = 128; + // uuid 段数 + static const int NUMBER_OF_SECTIONS = 5; + // uuid 每段的字节数 + static const int bytes_per_section[NUMBER_OF_SECTIONS]; + static const int hex_to_byte[256]; + /** + 给定的字符是否是有效的 uuid 文本,调用 parse() + */ + static bool is_valid(const char *string, size_t len); + + /** + 将给定的字符串解析为 uuid 并存储为 UUID 对象 + */ + int parse(const char *string, size_t len); + + /** + 给定的字符串解析并存储为二进制 UUID 字符串,调用 read_section + */ + static int parse(const char *in_string, size_t len, const unsigned char *out_binary_string); + /** + 解析 uuid 字符串中的一个 section + + */ + static bool read_section(int section_len, const char **section_str, + const unsigned char **out_binary_str); + + size_t to_string(char *buf) const; + static size_t to_string(const unsigned char *bytes_arg, char *buf); + + std::string to_string() const + { + char buf[TEXT_LENGTH + 1]; + to_string(buf); + return buf; + } + + void print() const + { + char buf[TEXT_LENGTH + 1]; + to_string(buf); + printf("%s\n", buf); + } +}; + +struct Hash_Uuid { + size_t operator()(const Uuid &uuid) const + { + return std::hash()( + std::string(pointer_cast(uuid.bytes), Uuid::BYTE_LENGTH)); + } +}; + +inline bool operator==(const Uuid &a, const Uuid &b) +{ + return a.equals(b); +} + +} // namespace binary_log diff --git a/binlogconvert/src/CMakeLists.txt b/binlogconvert/src/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..a7b97278d8aabe2c17b74164b47f9f0459d9814d --- /dev/null +++ b/binlogconvert/src/CMakeLists.txt @@ -0,0 +1,16 @@ +#收集所有源文件 +file(GLOB_RECURSE HELP_SRC ${PROJECT_SOURCE_DIR}/src/*/*.cpp) +file(GLOB_RECURSE SRC ${PROJECT_SOURCE_DIR}/src/*.cpp) +# 创建库 +add_library(sql2bl SHARED ${HELP_SRC} ${SRC}) +# Find Threads package +find_package(Threads REQUIRED) +# Link libraries +target_link_libraries(sql2bl Threads::Threads stdc++fs) + +# Include directories for sql2bl +target_include_directories(sql2bl + PUBLIC + $ + $ +) diff --git a/binlogconvert/src/basic_ostream.cpp b/binlogconvert/src/basic_ostream.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c49659a30230c7d1e4eb32e1236fa53e6592552f --- /dev/null +++ b/binlogconvert/src/basic_ostream.cpp @@ -0,0 +1,63 @@ +#include "basic_ostream.h" +#include "common/logging.h" + +bool Binlog_ofile::write(const uchar *buffer, my_off_t length) +{ + if (m_pipeline_head_ == nullptr) { + LOG_ERROR("binlog file stream may be not open..."); + return false; + } + + if (length == 0) { + return true; + } + + m_pipeline_head_->write(reinterpret_cast(buffer), length); + + if (!m_pipeline_head_->good()) { + return false; + } + + m_position_ += length; + return true; +} + +RC Binlog_ofile::seek(my_off_t position) +{ + if (m_pipeline_head_ == nullptr) { + LOG_ERROR("binlog file stream may be not open..."); + return RC::FILE_NOT_OPENED; + } + + m_pipeline_head_->seekp(position); + if (!m_pipeline_head_->good()) { + return RC::IOERR_SEEK; + } + m_position_ = position; + return RC::SUCCESS; +} + +RC Binlog_ofile::sync() +{ + if (m_pipeline_head_ == nullptr) { + LOG_ERROR("binlog file stream may be not open..."); + return RC::FILE_NOT_OPENED; + } + m_pipeline_head_->flush(); + return m_pipeline_head_->good() ? RC::SUCCESS : RC::IOERR_SYNC; +} + +RC Binlog_ofile::flush() +{ + return sync(); +} + +Binlog_ofile::Binlog_ofile(const char *binlog_name, RC &rc) +{ + // position 不能直接初始化为 0,可能当前要写入的文件是 继续最后一个文件写 + if (open(binlog_name)) { + rc = RC::SUCCESS; + } else { + rc = RC::IOERR_OPEN; + } +} diff --git a/binlogconvert/src/binlog.cpp b/binlogconvert/src/binlog.cpp new file mode 100644 index 0000000000000000000000000000000000000000..986cf1dc8eed7b2eeb02b28260ca731231dc5b80 --- /dev/null +++ b/binlogconvert/src/binlog.cpp @@ -0,0 +1,69 @@ +#include "binlog.h" + +MYSQL_BIN_LOG::MYSQL_BIN_LOG(const char *file_name, uint64_t file_size, RC &rc) + : max_size_(file_size) + , atomic_log_state_(LOG_CLOSED) + , bytes_written_(0) +{ + // 检查文件名是否为空 + if (!file_name) { + LOG_ERROR("file_name should not be null."); + rc = RC::INVALID_ARGUMENT; + return; + } + + std::fill(file_name_, file_name_ + FN_REFLEN, '\0'); + // 即使传入的文件名不足 FN_REFLEN - 1,file_name_ 也会以空字符结尾 + std::strncpy(file_name_, file_name, FN_REFLEN - 1); + file_name_[FN_REFLEN - 1] = '\0'; // Null-terminate to prevent overflow + + rc = RC::SUCCESS; +} + +RC MYSQL_BIN_LOG::open() +{ + // 1: 打开文件流 + RC ret; + m_binlog_file_ = std::make_unique(file_name_, ret); + + if (ret == RC::IOERR_OPEN) { + atomic_log_state_ = LOG_CLOSED; + LOG_ERROR("Failed to open binlog file."); + return ret; + } + + atomic_log_state_ = LOG_OPENED; + + // Step 2: 如果打开的是一个空文件,就会先写一个 magic number 和 一个 fde + if (m_binlog_file_->is_empty()) { + bool w_ok = m_binlog_file_->write(reinterpret_cast(BINLOG_MAGIC), + BIN_LOG_HEADER_SIZE); + + auto fde = std::make_unique(BINLOG_VERSION, SERVER_VERSION_STR); + bool w_ok2 = write_event_to_binlog(fde.get()); + + if (!w_ok || !w_ok2) { + LOG_ERROR("Failed to write magic number and fde to binlog start"); + return RC::IOERR_WRITE; + } + } + + return RC::SUCCESS; +} + +RC MYSQL_BIN_LOG::close() +{ + if (atomic_log_state_ == LOG_OPENED) { + atomic_log_state_ = LOG_CLOSED; + } + reset_bytes_written(); + m_binlog_file_->sync(); + LOG_INFO(" MYSQL_BIN_LOG [%s] exit......", file_name_); + m_binlog_file_->close(); + return RC::SUCCESS; +} + +bool MYSQL_BIN_LOG::write_event_to_binlog(AbstractEvent *ev) +{ + return ev->write(this->m_binlog_file_.get()); +} \ No newline at end of file diff --git a/binlogconvert/src/buffer_reader.cpp b/binlogconvert/src/buffer_reader.cpp new file mode 100644 index 0000000000000000000000000000000000000000..12ad5333df63be99d21e8d4b421e2ac93afa0377 --- /dev/null +++ b/binlogconvert/src/buffer_reader.cpp @@ -0,0 +1,26 @@ +#include "buffer_reader.h" +#include + +BufferReader::BufferReader(const char *buffer, unsigned long long length) noexcept + : buffer_(buffer) + , ptr_(buffer) + , limit_(length) +{} + +void BufferReader::forward(size_t length) +{ + if (ptr_ + length > buffer_ + limit_) { + throw std::out_of_range("Attempt to forward beyond buffer limit"); + } + ptr_ += length; +} + +unsigned long long BufferReader::position() const noexcept +{ + return ptr_ >= buffer_ ? ptr_ - buffer_ : limit_; +} + +bool BufferReader::valid() const noexcept +{ + return ptr_ < buffer_ + limit_; +} diff --git a/binlogconvert/src/common/rc.cpp b/binlogconvert/src/common/rc.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c6d43d48d1cd654a2e15d1b64504aad3469a057a --- /dev/null +++ b/binlogconvert/src/common/rc.cpp @@ -0,0 +1,31 @@ +// +// Created by Coonger on 2024/11/2. +// + +#include "common/rc.h" + +const char *strrc(RC rc) +{ +#define DEFINE_RC(name) \ + case RC::name: { \ + return #name; \ + } break; + + switch (rc) { + DEFINE_RCS; + default: { + return "unknown"; + } + } +#undef DEFINE_RC +} + +bool LOFT_SUCC(RC rc) +{ + return rc == RC::SUCCESS; +} + +bool LOFT_FAIL(RC rc) +{ + return rc != RC::SUCCESS; +} diff --git a/binlogconvert/src/common/thread_pool_executor.cpp b/binlogconvert/src/common/thread_pool_executor.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a6974171ce180cbe22f6cb8358db9f8f51cfe5dd --- /dev/null +++ b/binlogconvert/src/common/thread_pool_executor.cpp @@ -0,0 +1,220 @@ +#include "common/thread_pool_executor.h" + +#include +#include + +#include "common/logging.h" +#include "common/simple_queue.h" +#include "common/thread_util.h" + +using namespace std; + +namespace common { + +RC ThreadPoolExecutor::init(const char *name, int core_pool_size, int max_pool_size, + long keep_alive_time_ms) +{ + unique_ptr>> queue_ptr(new (nothrow) + SimpleQueue>()); + return init(name, core_pool_size, max_pool_size, keep_alive_time_ms, std::move(queue_ptr)); +} + +RC ThreadPoolExecutor::init(const char *name, int core_pool_size, int max_pool_size, + long keep_alive_time_ms, + unique_ptr>> &&work_queue) +{ + if (state_ != State::NEW) { + LOG_ERROR("invalid state. state=%d", static_cast(state_)); + return RC::INVALID_ARGUMENT; + } + + if (core_pool_size < 0 || max_pool_size <= 0 || core_pool_size > max_pool_size) { + LOG_ERROR("invalid argument. core_pool_size=%d, max_pool_size=%d", core_pool_size, + max_pool_size); + return RC::INVALID_ARGUMENT; + } + + if (name != nullptr) { + pool_name_ = name; + } + + core_pool_size_ = core_pool_size; + max_pool_size_ = max_pool_size; + keep_alive_time_ms_ = chrono::milliseconds(keep_alive_time_ms); + work_queue_ = std::move(work_queue); + + while (static_cast(threads_.size()) < core_pool_size_) { + if (create_thread(true /* core_thread */) != RC::SUCCESS) { + LOG_ERROR("create thread failed"); + return RC::INVALID_ARGUMENT; + } + } + + state_ = State::RUNNING; + return RC::SUCCESS; +} + +ThreadPoolExecutor::~ThreadPoolExecutor() +{ + if (state_ != State::TERMINATED) { + shutdown(); + await_termination(); + } +} + +RC ThreadPoolExecutor::shutdown() +{ + if (state_ != State::RUNNING) { + return RC::SUCCESS; + } + + state_ = State::TERMINATING; + return RC::SUCCESS; +} + +RC ThreadPoolExecutor::execute(const function &callable) +{ + unique_ptr task_ptr = make_unique(callable); + return this->execute(std::move(task_ptr)); +} + +RC ThreadPoolExecutor::execute(unique_ptr &&task) +{ + if (state_ != State::RUNNING) { + LOG_ERROR("[%s] cannot submit task. state=%d", pool_name_, static_cast(state_)); + return RC::INVALID_ARGUMENT; + } + + RC ret = work_queue_->push(std::move(task)); + int task_size = work_queue_->size(); + if (task_size > pool_size() - active_count()) { + extend_thread(); + } + return ret; +} + +RC ThreadPoolExecutor::await_termination() +{ + if (state_ != State::TERMINATING) { + return RC::INVALID_ARGUMENT; + } + + while (threads_.size() > 0) { + this_thread::sleep_for(200ms); + } + return RC::SUCCESS; +} + +void ThreadPoolExecutor::thread_func() +{ + LOG_INFO("[%s] thread started", pool_name_.c_str()); + + int ret = thread_set_name(pool_name_); + if (ret != 0) { + LOG_ERROR("[%s] set thread name failed", pool_name_); + } + + lock_.lock(); + auto iter = threads_.find(this_thread::get_id()); + if (iter == threads_.end()) { + std::ostringstream oss; + oss << std::this_thread::get_id(); + LOG_ERROR("[%s] cannot find thread state of %s", pool_name_, oss.str().c_str()); + return; + } + ThreadData &thread_data = iter->second; + lock_.unlock(); + + using Clock = chrono::steady_clock; + + chrono::time_point idle_deadline = Clock::now(); + if (!thread_data.core_thread && keep_alive_time_ms_.count() > 0) { + idle_deadline += keep_alive_time_ms_; + } + + /// 这里使用最粗暴的方式检测线程是否可以退出了 + /// 但是实际上,如果当前的线程个数比任务数要多,或者差不多,而且任务执行都很快的时候, + /// 并不需要保留这么多线程 + while (thread_data.core_thread || Clock::now() < idle_deadline) { + unique_ptr task; + + RC ret = work_queue_->pop(task); + if (RC::SUCCESS == ret && task) { + thread_data.idle = false; + ++active_count_; + task->run(); + --active_count_; + thread_data.idle = true; + ++task_count_; + + if (keep_alive_time_ms_.count() > 0) { + idle_deadline = Clock::now() + keep_alive_time_ms_; + } + } + if (state_ != State::RUNNING && work_queue_->size() == 0) { + break; + } + } + + thread_data.terminated = true; + thread_data.thread_ptr->detach(); + delete thread_data.thread_ptr; + thread_data.thread_ptr = nullptr; + + lock_.lock(); + threads_.erase(this_thread::get_id()); + lock_.unlock(); + + LOG_INFO("[%s] thread exit", pool_name_.c_str()); +} + +RC ThreadPoolExecutor::create_thread(bool core_thread) +{ + lock_guard guard(lock_); + return create_thread_locked(core_thread); +} + +RC ThreadPoolExecutor::create_thread_locked(bool core_thread) +{ + thread *thread_ptr = new (nothrow) thread(&ThreadPoolExecutor::thread_func, this); + if (thread_ptr == nullptr) { + LOG_ERROR("create thread failed"); + return RC::INVALID_ARGUMENT; + } + + ThreadData thread_data; + thread_data.core_thread = core_thread; + thread_data.idle = true; + thread_data.terminated = false; + thread_data.thread_ptr = thread_ptr; + threads_[thread_ptr->get_id()] = thread_data; + + if (static_cast(threads_.size()) > largest_pool_size_) { + largest_pool_size_ = static_cast(threads_.size()); + } + return RC::SUCCESS; +} + +RC ThreadPoolExecutor::extend_thread() +{ + lock_guard guard(lock_); + + // 超过最大线程数,不再创建 + if (pool_size() >= max_pool_size_) { + return RC::SUCCESS; + } + // 任务数比空闲线程数少,不创建新线程 + if (work_queue_->size() <= pool_size() - active_count()) { + return RC::SUCCESS; + } + + return create_thread_locked(false /* core_thread */); +} + +void ThreadPoolExecutor::log_status() const +{ + LOG_DEBUG("[%s] Current pool size: %d. Active threads: %d. Largest pool size: %d.", + pool_name_, pool_size(), active_count(), largest_pool_size()); +} + +} // end namespace common diff --git a/binlogconvert/src/common/thread_util.cpp b/binlogconvert/src/common/thread_util.cpp new file mode 100644 index 0000000000000000000000000000000000000000..48541b058476c274a0831fde9a4aff0f52692e69 --- /dev/null +++ b/binlogconvert/src/common/thread_util.cpp @@ -0,0 +1,19 @@ +#include +#include + +namespace common { + +int thread_set_name(const char *name) +{ + const int namelen = 16; + char buf[namelen]; + snprintf(buf, namelen, "%s", name); + +#ifdef __APPLE__ + return pthread_setname_np(buf); +#elif __linux__ + return pthread_setname_np(pthread_self(), buf); +#endif +} + +} // namespace common diff --git a/binlogconvert/src/events/abstract_event.cpp b/binlogconvert/src/events/abstract_event.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ef7dc505e0a7e7c432a8a4d0edff0bc4f3b5b3d1 --- /dev/null +++ b/binlogconvert/src/events/abstract_event.cpp @@ -0,0 +1,60 @@ +// +// Created by Coonger on 2024/10/17. +// + +#include "events/abstract_event.h" + +#include "common/logging.h" +#include "utils/little_endian.h" + +time_t AbstractEvent::get_common_header_time() +{ + struct timeval tv; + gettimeofday(&tv, NULL); + return tv.tv_sec; +} + +uint32 AbstractEvent::write_common_header_to_memory(uchar *buf) +{ + // ts类型,和写 log_pos 一样是在 制作完后续的 event data body + // 写完才确定的时间 + int4store(buf, common_header_->timestamp_); // 不算微秒 + buf[EVENT_TYPE_OFFSET] = type_code_; + int4store(buf + SERVER_ID_OFFSET, SERVER_ID); + int4store(buf + EVENT_LEN_OFFSET, static_cast(common_header_->data_written_)); + int4store(buf + LOG_POS_OFFSET, static_cast(common_header_->log_pos_)); + int2store(buf + FLAGS_OFFSET, common_header_->flags_); + + return LOG_EVENT_HEADER_LEN; +} + +bool AbstractEvent::write_common_header(Basic_ostream *ostream, size_t event_data_length) +{ + uchar header[LOG_EVENT_HEADER_LEN]; + + common_header_->data_written_ = sizeof(header) + event_data_length; + + common_header_->log_pos_ = ostream->get_position() + common_header_->data_written_; + + write_common_header_to_memory(header); + + LOG_INFO("current event common-header write pos: %llu", ostream->get_position()); + + return ostream->write(header, LOG_EVENT_HEADER_LEN); +} + +size_t AbstractEvent::write_common_header_to_buffer(uchar *buffer) +{ + common_header_->data_written_ = LOG_EVENT_HEADER_LEN + get_data_size(); + // 先用占位符填充 log_pos_ + common_header_->log_pos_ = POSITION_PLACEHOLDER; + + int4store(buffer, common_header_->timestamp_); + buffer[EVENT_TYPE_OFFSET] = type_code_; + int4store(buffer + SERVER_ID_OFFSET, SERVER_ID); + int4store(buffer + EVENT_LEN_OFFSET, common_header_->data_written_); + int4store(buffer + LOG_POS_OFFSET, common_header_->log_pos_); + int2store(buffer + FLAGS_OFFSET, common_header_->flags_); + + return LOG_EVENT_HEADER_LEN; +} diff --git a/binlogconvert/src/events/control_events.cpp b/binlogconvert/src/events/control_events.cpp new file mode 100644 index 0000000000000000000000000000000000000000..70ae706f1d27caf3eb561488c53424bfad5df548 --- /dev/null +++ b/binlogconvert/src/events/control_events.cpp @@ -0,0 +1,302 @@ +#include "events/control_events.h" + +#include +#include + +#include "utils/little_endian.h" + +/************************************************************************** + Format_description_event methods +**************************************************************************/ + +// 在每个构造函数里,并没有实例化 common_header_ 和 common_footer_ 成员变量 +Format_description_event::Format_description_event(uint8 binlog_ver, const char *server_ver) + : AbstractEvent(FORMAT_DESCRIPTION_EVENT) + , binlog_version_(BINLOG_VERSION) +{ + if (binlog_ver == MYSQL_BINLOG_VERSION) { /* MySQL 5.0 and above */ + memset(server_version_, 0, ST_SERVER_VER_LEN); + // 直接写入 + strncpy(server_version_, server_ver, ST_SERVER_VER_LEN); + + common_header_len_ = LOG_EVENT_HEADER_LEN; + number_of_event_types = LOG_EVENT_TYPES; + + static uint8 server_event_header_length[] = { + 0, + QUERY_HEADER_LEN, + STOP_HEADER_LEN, + ROTATE_HEADER_LEN, + INTVAR_HEADER_LEN, + 0, + 0, + 0, + APPEND_BLOCK_HEADER_LEN, + 0, + DELETE_FILE_HEADER_LEN, + 0, + RAND_HEADER_LEN, + USER_VAR_HEADER_LEN, + FORMAT_DESCRIPTION_HEADER_LEN, + XID_HEADER_LEN, + BEGIN_LOAD_QUERY_HEADER_LEN, + EXECUTE_LOAD_QUERY_HEADER_LEN, + TABLE_MAP_HEADER_LEN, + 0, + 0, + 0, + ROWS_HEADER_LEN_V1, /* WRITE_ROWS_EVENT_V1 */ + ROWS_HEADER_LEN_V1, /* UPDATE_ROWS_EVENT_V1 */ + ROWS_HEADER_LEN_V1, /* DELETE_ROWS_EVENT_V1 */ + INCIDENT_HEADER_LEN, + 0, /* HEARTBEAT_LOG_EVENT */ + IGNORABLE_HEADER_LEN, + IGNORABLE_HEADER_LEN, + ROWS_HEADER_LEN_V2, + ROWS_HEADER_LEN_V2, + ROWS_HEADER_LEN_V2, + Gtid_event::POST_HEADER_LENGTH, /* GTID_EVENT */ + Gtid_event::POST_HEADER_LENGTH, /* ANONYMOUS_GTID_EVENT */ + IGNORABLE_HEADER_LEN, + TRANSACTION_CONTEXT_HEADER_LEN, + VIEW_CHANGE_HEADER_LEN, + XA_PREPARE_HEADER_LEN, + ROWS_HEADER_LEN_V2, + TRANSACTION_PAYLOAD_EVENT, + 0 /* HEARTBEAT_LOG_EVENT_V2 */ + }; + + post_header_len_.insert(post_header_len_.begin(), server_event_header_length, + server_event_header_length + number_of_event_types); + } else { /* Includes binlog version < 4 */ + } + + // AbstarctEvent 在写 common_header + // 时,会使用成员变量,type_code_,故先不填充没事 + this->common_header_ = std::make_unique(get_common_header_time()); +} + +Format_description_event::~Format_description_event() = default; + +// 只负责写 event-data:包括 post-header 和 event-body +bool Format_description_event::write(Basic_ostream *ostream) +{ + // fde 只有 post-header + size_t rec_size = AbstractEvent::FORMAT_DESCRIPTION_HEADER_LEN + BINLOG_CHECKSUM_ALG_DESC_LEN; + uchar buff[rec_size]; + + int2store(buff + ST_BINLOG_VER_OFFSET, binlog_version_); + memcpy((char *)buff + ST_SERVER_VER_OFFSET, server_version_, ST_SERVER_VER_LEN); + create_timestamp_ = get_fde_create_time(); + int4store(buff + ST_CREATED_OFFSET, static_cast(create_timestamp_)); + buff[ST_COMMON_HEADER_LEN_OFFSET] = LOG_EVENT_HEADER_LEN; // store 1 byte + + size_t number_of_events = static_cast(post_header_len_.size()); + + memcpy((char *)buff + ST_COMMON_HEADER_LEN_OFFSET + 1, &post_header_len_.front(), + number_of_events); + buff[FORMAT_DESCRIPTION_HEADER_LEN] = (uint8_t)BINLOG_CHECKSUM_ALG_OFF; + + return write_common_header(ostream, rec_size) && ostream->write(buff, rec_size); +} + +time_t Format_description_event::get_fde_create_time() +{ + struct timeval tv; + gettimeofday(&tv, NULL); + return tv.tv_sec; // Return time in seconds +} + +/************************************************************************** + Gtid_event methods +**************************************************************************/ + +Gtid_event::Gtid_event(int64 last_committed_arg, int64 sequence_number_arg, + bool may_have_sbr_stmts_arg, uint64 original_commit_timestamp_arg, + uint64 immediate_commit_timestamp_arg, uint32 original_server_version_arg, + uint32 immediate_server_version_arg) + : AbstractEvent(GTID_LOG_EVENT) + , last_committed_(last_committed_arg) + , sequence_number_(sequence_number_arg) + , may_have_sbr_stmts_(may_have_sbr_stmts_arg) + , original_commit_timestamp_(original_commit_timestamp_arg) + , immediate_commit_timestamp_(immediate_commit_timestamp_arg) + , transaction_length_(0) + , original_server_version_(original_server_version_arg) + , immediate_server_version_(immediate_server_version_arg) +{ + // 默认当前 txn 是 Anonymous + spec_.set_anonymous(); + spec_.gtid_.clear(); + sid_.clear(); + + time_t o_ts = static_cast(original_commit_timestamp_arg / 1000000); + this->common_header_ = std::make_unique(o_ts); + Log_event_type event_type = + (spec_.type_ == ANONYMOUS_GTID ? Log_event_type::ANONYMOUS_GTID_LOG_EVENT + : Log_event_type::GTID_LOG_EVENT); + this->type_code_ = event_type; +} + +size_t Gtid_event::get_data_size() +{ + // 默认 txn_length = 0, 省略 net_length_size(transaction_length) 大小 + // 只有考虑 commit_group_ticket 参数,才会计算 txn_length + return POST_HEADER_LENGTH + get_commit_timestamp_length() + 1 + get_server_version_length(); +} + +uint32 Gtid_event::write_post_header_to_memory(uchar *buffer) +{ + uchar *ptr_buffer = buffer; + + /* Encode the GTID flags */ + uchar gtid_flags = 0; // 1 byte + gtid_flags |= may_have_sbr_stmts_ ? Gtid_event::FLAG_MAY_HAVE_SBR : 0; + *ptr_buffer = gtid_flags; + ptr_buffer += ENCODED_FLAG_LENGTH; + + sid_.copy_to(ptr_buffer); // 16 byte + ptr_buffer += ENCODED_SID_LENGTH; + + int8store(ptr_buffer, spec_.gtid_.gno_); // 8 byte + ptr_buffer += ENCODED_GNO_LENGTH; + + *ptr_buffer = LOGICAL_TIMESTAMP_TYPECODE; + ptr_buffer += LOGICAL_TIMESTAMP_TYPECODE_LENGTH; // 1 byte + + int8store(ptr_buffer, last_committed_); // 8 byte + int8store(ptr_buffer + 8, sequence_number_); // 8 byte + ptr_buffer += LOGICAL_TIMESTAMP_LENGTH; + + return POST_HEADER_LENGTH; +} + +size_t Gtid_event::write_data_header_to_buffer(uchar *buffer) +{ + uchar *ptr_buffer = buffer; + + // Encode the GTID flags + uchar gtid_flags = 0; // 1 byte + gtid_flags |= may_have_sbr_stmts_ ? Gtid_event::FLAG_MAY_HAVE_SBR : 0; + *ptr_buffer = gtid_flags; + ptr_buffer += ENCODED_FLAG_LENGTH; + + // Copy SID + sid_.copy_to(ptr_buffer); // 16 bytes + ptr_buffer += ENCODED_SID_LENGTH; + + // Store GNO + int8store(ptr_buffer, spec_.gtid_.gno_); // 8 bytes + ptr_buffer += ENCODED_GNO_LENGTH; + + // Logical timestamp typecode + *ptr_buffer = LOGICAL_TIMESTAMP_TYPECODE; // 1 byte + ptr_buffer += LOGICAL_TIMESTAMP_TYPECODE_LENGTH; + + // Store last committed and sequence number + int8store(ptr_buffer, last_committed_); // 8 bytes + int8store(ptr_buffer + 8, sequence_number_); // 8 bytes + ptr_buffer += LOGICAL_TIMESTAMP_LENGTH; + + return POST_HEADER_LENGTH; // Total header length +} + +size_t Gtid_event::write_data_body_to_buffer(uchar *buffer) +{ + uchar *ptr_buffer = buffer; + + // Immediate commit timestamp with flag + unsigned long long immediate_commit_timestamp_with_flag = immediate_commit_timestamp_; + if (immediate_commit_timestamp_ != original_commit_timestamp_) { + immediate_commit_timestamp_with_flag |= (1ULL << ENCODED_COMMIT_TIMESTAMP_LENGTH); + } else { // Clear highest bit (MSB) + immediate_commit_timestamp_with_flag &= ~(1ULL << ENCODED_COMMIT_TIMESTAMP_LENGTH); + } + int7store(ptr_buffer, immediate_commit_timestamp_with_flag); // 7 bytes + ptr_buffer += IMMEDIATE_COMMIT_TIMESTAMP_LENGTH; + + // Original commit timestamp if different + if (immediate_commit_timestamp_ != original_commit_timestamp_) { + int7store(ptr_buffer, original_commit_timestamp_); // 7 bytes + ptr_buffer += ORIGINAL_COMMIT_TIMESTAMP_LENGTH; + } + + // Transaction length + uchar *ptr_after_length = net_store_length(ptr_buffer, transaction_length_); + ptr_buffer = ptr_after_length; + + // Immediate server version with flag + uint32_t immediate_server_version_with_flag = immediate_server_version_; + if (immediate_server_version_ != original_server_version_) { + immediate_server_version_with_flag |= (1ULL << ENCODED_SERVER_VERSION_LENGTH); + } else { // Clear MSB + immediate_server_version_with_flag &= ~(1ULL << ENCODED_SERVER_VERSION_LENGTH); + } + int4store(ptr_buffer, immediate_server_version_with_flag); // 4 bytes + ptr_buffer += IMMEDIATE_SERVER_VERSION_LENGTH; + + // Original server version if different + if (immediate_server_version_ != original_server_version_) { + int4store(ptr_buffer, original_server_version_); // 4 bytes + ptr_buffer += ORIGINAL_SERVER_VERSION_LENGTH; + } + + // Return the total written body length + return ptr_buffer - buffer; +} + +Gtid_event::~Gtid_event() = default; + +/************************************************************************** + Xid_event methods +**************************************************************************/ +Xid_event::Xid_event(uint64_t xid_arg, uint64 original_commit_timestamp_arg) + : AbstractEvent(XID_EVENT) + , xid_(xid_arg) +{ + time_t o_ts = static_cast(original_commit_timestamp_arg / 1000000); + this->common_header_ = std::make_unique(o_ts); +} + +size_t Xid_event::write_data_header_to_buffer(uchar *buffer) +{ + return XID_HEADER_LEN; +} +size_t Xid_event::write_data_body_to_buffer(uchar *buffer) +{ + memcpy(buffer, (uchar *)&xid_, sizeof(xid_)); + return sizeof(xid_); +} + +/************************************************************************** + Rotate_event methods +**************************************************************************/ + +Rotate_event::Rotate_event(const std::string &new_log_ident_arg, size_t ident_len_arg, + uint32 flags_arg, uint64 pos_arg) + : AbstractEvent(ROTATE_EVENT) + , new_log_ident_(new_log_ident_arg) + , ident_len_(ident_len_arg ? ident_len_arg : new_log_ident_arg.length()) + , flags_(flags_arg) /* DUP_NAME */ + , pos_(pos_arg) +{ /* 4 byte */ + + this->common_header_ = std::make_unique(get_common_header_time()); +} + + +bool Rotate_event::write_data_header(Basic_ostream *stream) +{ + uchar buf[ROTATE_HEADER_LEN]; + // 写入位置信息 + int8store(buf + R_POS_OFFSET, pos_); + + // 写入文件 + return stream->write(buf, ROTATE_HEADER_LEN); +} + +bool Rotate_event::write_data_body(Basic_ostream *stream) +{ + // 直接写入新日志标识 + return stream->write(pointer_cast(new_log_ident_.c_str()), ident_len_); +} \ No newline at end of file diff --git a/binlogconvert/src/events/rows_event.cpp b/binlogconvert/src/events/rows_event.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f27c58970fb7a72184f4372b3ff396d9952f0492 --- /dev/null +++ b/binlogconvert/src/events/rows_event.cpp @@ -0,0 +1,230 @@ +#include "events/rows_event.h" + +#include "common/logging.h" +#include "events/abstract_event.h" +#include "sql/mysql_fields.h" +#include "utils/little_endian.h" +#include + +template class Bit_stream_base { +protected: + T *m_ptr; + uint m_current_bit; + +public: + Bit_stream_base(T *ptr) + : m_ptr(ptr) + , m_current_bit(0) + {} + + /** + Set the buffer pointer. + @param ptr Pointer where bits will be read or written. + */ + void set_ptr(T *ptr) { m_ptr = ptr; } + + /** + Set the buffer pointer, using an unsigned datatype. + @param ptr Pointer where bits will be read or written. + */ + void set_ptr(UT *ptr) { m_ptr = (T *)ptr; } + + /// @return the current position. + uint tell() const { return m_current_bit; } +}; + +/** + * @brief bit 写入器 + */ +class Bit_writer : public Bit_stream_base { +public: + Bit_writer(char *ptr = nullptr) + : Bit_stream_base(ptr) + {} + + Bit_writer(uchar *ptr) + : Bit_writer((char *)ptr) + {} + + /** + Write the next bit and move the write position one bit forward. + @param set_to_on If true, set the bit to 1, otherwise set it to 0. + */ + void set(bool set_to_on) + { + uint byte = m_current_bit / 8; + uint bit_within_byte = m_current_bit % 8; + m_current_bit++; + if (bit_within_byte == 0) { + m_ptr[byte] = set_to_on ? 1 : 0; + } else if (set_to_on) { + m_ptr[byte] |= 1 << bit_within_byte; + } + } +}; + +Table_map_event::Table_map_event(const Table_id &tid, uint64 colcnt, const char *dbnam, + size_t dblen, const char *tblnam, size_t tbllen, + const std::vector &column_view, + uint64 original_commit_timestamp_arg) + : AbstractEvent(TABLE_MAP_EVENT) + , m_table_id_(tid) + , m_data_size_(0) + , m_dbnam_("") + , m_dblen_(dblen) + , m_tblnam_("") + , m_tbllen_(tbllen) + , m_colcnt_(colcnt) + , m_column_view_(column_view) + , m_field_metadata_size_(0) + , m_field_metadata_(nullptr) + , m_null_bits_(nullptr) +{ + if (dbnam) { + m_dbnam_ = std::string(dbnam, m_dblen_); + } + if (tblnam) { + m_tblnam_ = std::string(tblnam, m_tbllen_); + } + + m_data_size_ = TABLE_MAP_HEADER_LEN; + + uchar dbuf[sizeof(m_dblen_) + 1]; + uchar tbuf[sizeof(m_tbllen_) + 1]; + uchar *const dbuf_end = net_store_length(dbuf, (size_t)m_dblen_); + if (static_cast(dbuf_end - dbuf) > sizeof(dbuf)) { + LOG_ERROR("Buffer overflow detected in dbuf"); + return; + } + uchar *const tbuf_end = net_store_length(tbuf, (size_t)m_tbllen_); + + if (static_cast(tbuf_end - tbuf) > sizeof(tbuf)) { + LOG_ERROR("Buffer overflow detected in tbuf"); + return; + } + + m_data_size_ += m_dblen_ + 1 + (dbuf_end - dbuf); // Include length and terminating \0 + m_data_size_ += m_tbllen_ + 1 + (tbuf_end - tbuf); // Include length and terminating \0 + + // =========================m_column_view_ 初始化, 制作 表头============== + + m_coltype_ = std::make_unique(colcnt); + for (size_t i = 0; i < colcnt; ++i) { + m_coltype_[i] = 0; + } + long pos = 0; + for (auto &field : m_column_view_) { + m_coltype_[pos++] = field->binlog_type(); + LOG_INFO("init coltype_: field->binlog_type() = %d", field->binlog_type()); + } + + uchar cbuf[sizeof(m_colcnt_) + 1]; + uchar *cbuf_end; + cbuf_end = net_store_length(cbuf, (size_t)m_colcnt_); + m_data_size_ += (cbuf_end - cbuf) + m_colcnt_; // COLCNT and column types + + // 3. 得到每个 Field 的元数据 + m_field_metadata_ = std::make_unique(m_colcnt_ * FIELD_METADATA_SIZE); + memset(m_field_metadata_.get(), 0, m_colcnt_ * FIELD_METADATA_SIZE); + m_field_metadata_size_ = save_field_metadata(); // 同时也填充了 m_field_metadata_ + if (m_field_metadata_size_ < MAX_METADATA_SIZE) { + m_data_size_ += m_field_metadata_size_ + 1; + } else { + m_data_size_ += m_field_metadata_size_ + 3; // +3 for the extended metadata size + // when m_field_metadata_size_ >= 251 + } + + ///////////////////////////// + uint num_null_bytes = (m_colcnt_ + 7) / 8; + m_data_size_ += num_null_bytes; + + m_null_bits_ = std::make_unique(num_null_bytes); + memset(m_null_bits_.get(), 0, num_null_bytes); + Bit_writer bit_writer{this->m_null_bits_.get()}; + + for (auto &field : m_column_view_) { + bit_writer.set(field->is_nullable()); + } + + LOG_INFO("table_map_event data size: %zu", m_data_size_); + + time_t o_ts = static_cast(original_commit_timestamp_arg / 1000000); + this->common_header_ = std::make_unique(o_ts); +} + +Table_map_event::~Table_map_event() = default; + +int Table_map_event::save_field_metadata() +{ + int index = 0; + for (auto &field : m_column_view_) { + // 时间类型的 date 没有元数据字段 + if (field->type() == MYSQL_TYPE_DATE) + continue; + index += field->save_field_metadata(&m_field_metadata_[index]); + } + + return index; +} + +size_t Table_map_event::write_data_header_to_buffer(uchar *buffer) +{ + if (!m_table_id_.is_valid()) { + return -1; + } + + // 写入 table id 和 flags + int6store(buffer + TM_MAPID_OFFSET, m_table_id_.get_id()); + int2store(buffer + TM_FLAGS_OFFSET, m_flags); + + return AbstractEvent::TABLE_MAP_HEADER_LEN; +} + +size_t Table_map_event::write_data_body_to_buffer(uchar *buffer) +{ + if (m_dbnam_.empty() || m_tblnam_.empty()) { + return -1; + } + + uchar *current_pos = buffer; + + // 写入数据库名长度 + uchar *const dbuf_end = net_store_length(current_pos, (size_t)m_dblen_); + current_pos = dbuf_end; + + // 写入数据库名 + memcpy(current_pos, m_dbnam_.c_str(), m_dblen_ + 1); + current_pos += m_dblen_ + 1; + + // 写入表名长度 + uchar *const tbuf_end = net_store_length(current_pos, (size_t)m_tbllen_); + current_pos = tbuf_end; + + // 写入表名 + memcpy(current_pos, m_tblnam_.c_str(), m_tbllen_ + 1); + current_pos += m_tbllen_ + 1; + + // 写入列数 + uchar *const cbuf_end = net_store_length(current_pos, (size_t)m_colcnt_); + current_pos = cbuf_end; + + // 写入列类型 + memcpy(current_pos, m_coltype_.get(), m_colcnt_); + current_pos += m_colcnt_; + + // 写入字段元数据大小 + uchar *const mbuf_end = net_store_length(current_pos, m_field_metadata_size_); + current_pos = mbuf_end; + + // 写入字段元数据 + memcpy(current_pos, m_field_metadata_.get(), m_field_metadata_size_); + current_pos += m_field_metadata_size_; + + // 写入空值位图 + size_t null_bits_len = (m_colcnt_ + 7) / 8; + memcpy(current_pos, m_null_bits_.get(), null_bits_len); + current_pos += null_bits_len; + + // 返回写入的总字节数 + return current_pos - buffer; +} diff --git a/binlogconvert/src/events/statement_events.cpp b/binlogconvert/src/events/statement_events.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f019690621b685c5aeb1d45976e0941f107fd339 --- /dev/null +++ b/binlogconvert/src/events/statement_events.cpp @@ -0,0 +1,141 @@ +// +// Created by Coonger on 2024/10/20. +// +#include "events/statement_events.h" + +#include "common/logging.h" +#include "events/abstract_event.h" +#include "utils/little_endian.h" +#include "utils/template_utils.h" +#include + +/****************************************************************************** + Query_event methods +******************************************************************************/ + +Query_event::Query_event(const char *query_arg, const char *catalog_arg, const char *db_arg, + uint64 ddl_xid_arg, uint32 query_length, uint64 thread_id_arg, + int32 errcode, uint64 original_commit_timestamp_arg) + : AbstractEvent(QUERY_EVENT) + , query_(query_arg) + , db_(db_arg) + , ddl_xid(ddl_xid_arg) + , catalog_(catalog_arg) + , thread_id_(thread_id_arg) + , db_len_(0) + , error_code_(errcode) + , status_vars_len_(0) + , q_len_(query_length) + , flags2_inited(true) + , sql_mode_inited(true) + , charset_inited(true) + , explicit_defaults_ts(TERNARY_UNSET) +{ + if (db_arg == nullptr) { + db_len_ = 0; + } else { + db_len_ = strlen(db_arg); + } + + query_exec_time_ = EXEC_TIME; + LOG_INFO("db_len_ = %zu, query_len = %zu", db_len_, q_len_); + + calculate_status_vars_len(); + + time_t o_ts = static_cast(original_commit_timestamp_arg / 1000000); + this->common_header_ = std::make_unique(o_ts); +} + +size_t Query_event::write_data_header_to_buffer(uchar *buffer) +{ + // 写入 Query 事件固定头部 + int4store(buffer + Q_THREAD_ID_OFFSET, thread_id_); + int4store(buffer + Q_EXEC_TIME_OFFSET, query_exec_time_); + buffer[Q_DB_LEN_OFFSET] = (unsigned char)db_len_; + int2store(buffer + Q_ERR_CODE_OFFSET, error_code_); + + return AbstractEvent::QUERY_HEADER_LEN; +} + +size_t Query_event::write_data_body_to_buffer(uchar *buffer) +{ + uchar *current_pos = buffer; + uchar *start_of_status = current_pos; + + // 写入状态变量 + if (ddl_xid != INVALID_XID) { + if (flags2_inited) { + *current_pos++ = Q_FLAGS2_CODE; + int4store(current_pos, flags2); + current_pos += FLAGS2_OFFSET; + } + if (charset_inited) { + *current_pos++ = Q_CHARSET_CODE; + int2store(current_pos, client_charset_); + int2store(current_pos + 2, connection_collation_); + int2store(current_pos + 4, server_collation_); + current_pos += CHARSET_OFFSET; + } + + // 写入数据库名 + if (db_ != nullptr) { + *current_pos++ = Q_UPDATED_DB_NAMES; + *current_pos++ = EMPTY_DB_INDICATOR; + strncpy((char *)current_pos, db_, strlen(db_) + 1); + current_pos += strlen(db_) + 1; + } + + if (query_start_usec_used_) { + *current_pos++ = Q_MICROSECONDS; + int3store(current_pos, common_header_->timestamp_ % TS_MICROSECOND_PART); + current_pos += MICROSECONDS_OFFSET; + } + + if (ddl_xid != INVALID_XID) { + *current_pos++ = Q_DDL_LOGGED_WITH_XID; + int8store(current_pos, ddl_xid); + current_pos += DDL_XID_OFFSET; + } + } + + // 更新状态变量长度 + status_vars_len_ = current_pos - start_of_status; + int2store(buffer - AbstractEvent::QUERY_HEADER_LEN + Q_STATUS_VARS_LEN_OFFSET, + status_vars_len_); + + // 写入数据库名 + if (db_) { + memcpy(current_pos, db_, db_len_); + } + current_pos += db_len_; + *current_pos++ = 0; // 数据库名结束符 + + // 写入查询语句 + memcpy(current_pos, query_, q_len_); + current_pos += q_len_; + + return current_pos - buffer; +} + +void Query_event::calculate_status_vars_len() +{ + size_t len = 0; + + if (ddl_xid != INVALID_XID) { + if (flags2_inited) + len += QUERY_STATUS_FLAG_OFFSET + FLAGS2_OFFSET; + + if (charset_inited) + len += QUERY_STATUS_FLAG_OFFSET + CHARSET_OFFSET; + + if (db_) + len += 2 * QUERY_STATUS_FLAG_OFFSET + strlen(db_) + 1; // 1 加上 ‘\n’ 结束符 + + if (query_start_usec_used_) + len += QUERY_STATUS_FLAG_OFFSET + MICROSECONDS_OFFSET; + if (ddl_xid != INVALID_XID) + len += QUERY_STATUS_FLAG_OFFSET + DDL_XID_OFFSET; + } + + status_vars_len_ = len; +} diff --git a/binlogconvert/src/events/write_event.cpp b/binlogconvert/src/events/write_event.cpp new file mode 100644 index 0000000000000000000000000000000000000000..715c38f78268a88b3552fa083e4abe15904010bb --- /dev/null +++ b/binlogconvert/src/events/write_event.cpp @@ -0,0 +1,234 @@ +#include "events/write_event.h" +#include + +Rows_event::Rows_event(const Table_id &tid, unsigned long wid, uint16 flag, Log_event_type type, + uint64 original_commit_timestamp_arg) + : m_table_id(tid) + , m_type(type) + , AbstractEvent(type) +{ + // 构造函数中预分配内存,按照 30 columns 来算 * 8 byte, + const size_t INITIAL_SIZE = 64; + m_rows_before_buf = std::make_unique(INITIAL_SIZE); + m_rows_after_buf = std::make_unique(INITIAL_SIZE); + m_before_capacity = INITIAL_SIZE; + m_after_capacity = INITIAL_SIZE; + before_data_size_used = 0; + after_data_size_used = 0; + + this->Set_width(wid); + this->Set_flags(flag); + cols_init(); + + time_t o_ts = static_cast(original_commit_timestamp_arg / 1000000); + this->common_header_ = std::make_unique(o_ts); +} + +Rows_event::~Rows_event() = default; + +void Rows_event::cols_init() +{ + int N = Get_N(); + columns_after_image = std::make_unique(N); + memset(columns_after_image.get(), 0xff, N * sizeof(uchar)); + columns_before_image = std::make_unique(N); + memset(columns_before_image.get(), 0xff, N * sizeof(uchar)); +} + +void Rows_event::buf_resize(std::unique_ptr &buf, size_t &capacity, size_t current_size, + size_t needed_size) +{ + if (needed_size <= capacity) { + return; // 如果现有容量足够,直接返回 + } + + // 计算新容量:至少是needed_size,并且是当前容量的2倍 + size_t new_capacity = std::max(needed_size, capacity * 2); + auto new_buf = std::make_unique(new_capacity); + + // 拷贝现有数据 + if (current_size > 0 && buf) { + memcpy(new_buf.get(), buf.get(), current_size); + } + + buf = std::move(new_buf); + capacity = new_capacity; +} + +void Rows_event::double2demi(double num, decimal_t &t, int precision, int frac) +{ + if (num < 0) { + num = -num; + t.sign = true; + } else { + t.sign = false; + } + t.intg = 0; + t.frac = 0; + int32_t *buf = new int32_t[precision / 9 + precision % 9]; + ulonglong intg = num; + ulonglong intg2 = num; + double frac1 = num - intg; + ulonglong fracg; + ulonglong fracg2; + int j = 0; + for (int i = 0; i < frac; i++) { + frac1 *= 10; + } + fracg = frac1; + while (fracg <= FRAC_DIGITS && fracg != 0) { + fracg *= 10; + } + fracg2 = fracg; + while (intg) { + buf[j++] = intg % INT_DIGITS; + intg /= INT_DIGITS; + } + while (fracg) { + buf[j++] = fracg % INT_DIGITS; + fracg /= INT_DIGITS; + } + while (intg2) { + t.intg += 1; + intg2 /= 10; + } + while (fracg2) { + t.frac += 1; + fracg2 /= 10; + } + t.buf = buf; + t.len = MAX_PRECISION; +} + +size_t Rows_event::calculate_event_size() +{ + size_t event_size = 0; + size_t n = Get_N(); + uchar sbuf[sizeof(m_width) + 1]; + uchar *const sbuf_end = net_store_length(sbuf, (size_t)m_width); + event_size += ROWS_HEADER_LEN_V2; + event_size += before_data_size_used; + event_size += after_data_size_used; + event_size += (sbuf_end - sbuf); + if (m_type == Log_event_type::WRITE_ROWS_EVENT) { + event_size += n; + event_size += (rows_after.size() + BIT_PER_BYTE - 1) / BIT_PER_BYTE; + } else if (m_type == Log_event_type::DELETE_ROWS_EVENT) { + event_size += n; + event_size += (rows_before.size() + BIT_PER_BYTE - 1) / BIT_PER_BYTE; + } else if (m_type == Log_event_type::UPDATE_ROWS_EVENT) { + event_size += n; + event_size += (rows_before.size() + BIT_PER_BYTE - 1) / BIT_PER_BYTE; + event_size += n; + event_size += (rows_after.size() + BIT_PER_BYTE - 1) / BIT_PER_BYTE; + } + return event_size; +} + +size_t Rows_event::write_data_header_to_buffer(uchar *buffer) +{ + int6store(buffer + ROWS_MAPID_OFFSET, m_table_id.get_id()); + int2store(buffer + ROWS_FLAGS_OFFSET, m_flags); + uint extra_row_info_payloadlen = EXTRA_ROW_INFO_HEADER_LENGTH; + int2store(buffer + ROWS_VHLEN_OFFSET, extra_row_info_payloadlen); + + return ROWS_HEADER_LEN_V2; +} + +size_t Rows_event::write_data_body_to_buffer(uchar *buffer) +{ + uchar *current_pos = buffer; + + // 写入width + uchar sbuf[sizeof(m_width) + 1]; + uchar *const sbuf_end = net_store_length(sbuf, (size_t)m_width); + memcpy(current_pos, sbuf, sbuf_end - sbuf); + current_pos += (sbuf_end - sbuf); + + // 处理DELETE和UPDATE事件的before image + if (m_type == Log_event_type::UPDATE_ROWS_EVENT || + m_type == Log_event_type::DELETE_ROWS_EVENT) { + int N = Get_N(); + if (rows_before.size() != 0) { + memset(columns_before_image.get(), 0, N * sizeof(uchar)); + } + + for (int i = 0; i < rows_before.size(); i++) { + int n = N - ((rows_before[i] - 1) / BIT_PER_BYTE + 1); + set_N_bit(*(columns_before_image.get() + n), + (rows_before[i] - 1) % BIT_PER_BYTE + 1); // Set the bit position in the byte + } + + if (rows_before.size() != 0) { + size_t row_bitmap_size = (rows_before.size() + BIT_PER_BYTE - 1) / BIT_PER_BYTE; + row_bitmap_before = std::make_unique(row_bitmap_size); + std::memset(row_bitmap_before.get(), 0x00, row_bitmap_size * sizeof(uchar)); + } + + std::reverse(columns_before_image.get(), columns_before_image.get() + N); + memcpy(current_pos, columns_before_image.get(), N); + current_pos += N; + } + + // 处理WRITE和UPDATE事件的after image + if (m_type == Log_event_type::UPDATE_ROWS_EVENT || m_type == Log_event_type::WRITE_ROWS_EVENT) { + int N = Get_N(); + if (rows_after.size() != 0) { + memset(columns_after_image.get(), 0, N * sizeof(uchar)); + } + + for (int i = 0; i < rows_after.size(); i++) { + assert(rows_after[i] <= m_width); + int n = N - ((rows_after[i] - 1) / BIT_PER_BYTE + 1); + set_N_bit(*(columns_after_image.get() + n), (rows_after[i] - 1) % BIT_PER_BYTE + 1); + } + + if (rows_after.size() != 0) { + size_t row_bitmap_size = (rows_after.size() + BIT_PER_BYTE - 1) / BIT_PER_BYTE; + row_bitmap_after = std::make_unique(row_bitmap_size); + memset(row_bitmap_after.get(), 0x00, row_bitmap_size * sizeof(uchar)); + } + std::reverse(columns_after_image.get(), columns_after_image.get() + N); + memcpy(current_pos, columns_after_image.get(), N); + current_pos += N; + } + + // 写入before数据 + if (m_type == Log_event_type::UPDATE_ROWS_EVENT || + m_type == Log_event_type::DELETE_ROWS_EVENT) { + size_t N = (rows_before.size() + BIT_PER_BYTE - 1) / BIT_PER_BYTE; + for (int i = 0; i < null_before.size(); i++) { + if (null_before[i]) { + int n = N - (i / BIT_PER_BYTE + 1); + set_N_bit(*(row_bitmap_before.get() + n), i % BIT_PER_BYTE + 1); + } + } + std::reverse(row_bitmap_before.get(), row_bitmap_before.get() + N); + memcpy(current_pos, row_bitmap_before.get(), N); + current_pos += N; + + memcpy(current_pos, m_rows_before_buf.get(), before_data_size_used); + current_pos += before_data_size_used; + } + + // 写入after数据 + if (m_type == Log_event_type::UPDATE_ROWS_EVENT || m_type == Log_event_type::WRITE_ROWS_EVENT) { + size_t N = (rows_after.size() + BIT_PER_BYTE - 1) / BIT_PER_BYTE; + for (int i = 0; i < null_after.size(); i++) { + if (null_after[i]) { + int n = N - (i / BIT_PER_BYTE + 1); + set_N_bit(*(row_bitmap_after.get() + n), + i % BIT_PER_BYTE + 1); // Set the bit corresponding to the + // null bit in the row bitmap + } + } + std::reverse(row_bitmap_after.get(), row_bitmap_after.get() + N); + memcpy(current_pos, row_bitmap_after.get(), N); + current_pos += N; + + memcpy(current_pos, m_rows_after_buf.get(), after_data_size_used); + current_pos += after_data_size_used; + } + + return current_pos - buffer; +} diff --git a/binlogconvert/src/log_file.cpp b/binlogconvert/src/log_file.cpp new file mode 100644 index 0000000000000000000000000000000000000000..459727b9afdafef5480f6bc5575b106e3286bc37 --- /dev/null +++ b/binlogconvert/src/log_file.cpp @@ -0,0 +1,861 @@ +// +// Created by Coonger on 2024/11/10. +// + +#include // std::from_chars +#include // std::strcmp +#include // ::open +#include // for std::setw, std::setfill +#include +#include // std::string_view +#include // for stat struct and stat() function + +#include "buffer_reader.h" +#include "common/logging.h" +#include "log_file.h" + +// 全局作用域添加定义 +static std::string index_path_storage; +const char* LogFileManager::default_binlog_index_file_name_ = nullptr; + +/****************************************************************************** + RedoLogFileReader +******************************************************************************/ + +auto RedoLogFileReader::open(const char *filename) -> RC +{ + filename_ = filename; + fd_ = ::open(filename, O_RDONLY); + if (fd_ < 0) { + LOG_ERROR("open file failed. filename=%s, error=%s", filename, strerror(errno)); + return RC::IOERR_OPEN; + } + + LOG_INFO("open file success. filename=%s, fd=%d", filename, fd_); + return RC::SUCCESS; +} + +auto RedoLogFileReader::close() -> RC +{ + if (fd_ < 0) { + return RC::FILE_NOT_OPENED; + } + + ::close(fd_); + fd_ = -1; + return RC::SUCCESS; +} + +auto RedoLogFileReader::readFromFile(const std::string &fileName) + -> std::pair, size_t> +{ + FILE *file = fopen(fileName.c_str(), "rb"); + if (file == nullptr) { + std::cerr << "Failed to open file " << fileName << std::endl; + return {nullptr, 0}; // 返回空指针和大小为0 + } + + const size_t bufferSize = IO_SIZE; // 每次读取4KB数据 + char buffer[bufferSize]; + size_t readSize = 0; + size_t oneRead = 0; + + // 动态缓冲区大小控制,通过unique_ptr管理data + std::unique_ptr data; + size_t dataCapacity = 0; + + // 循环读取文件内容 + while (!feof(file)) { + memset(buffer, 0, sizeof(buffer)); + oneRead = fread(buffer, 1, sizeof(buffer), file); + if (ferror(file)) { + std::cerr << "Failed to read data from " << fileName << std::endl; + fclose(file); + return {nullptr, 0}; + } + + // 如果当前读取大小超过 data 的容量,重新分配 + if (readSize + oneRead > dataCapacity) { + dataCapacity = (readSize + oneRead) * 2; + std::unique_ptr newData = std::make_unique(dataCapacity); + + if (data) { + memcpy(newData.get(), data.get(), readSize); + } + data = std::move(newData); + } + + memcpy(data.get() + readSize, buffer, oneRead); + readSize += oneRead; + } + + fclose(file); + + // 调整最终大小,使其准确匹配已读取的数据量 + std::unique_ptr result = std::make_unique(readSize + 1); + memcpy(result.get(), data.get(), readSize); + result[readSize] = '\0'; + + return {std::move(result), readSize}; +} + +/****************************************************************************** + BinLogFileWriter + fileWriter 的 open 和 close ,选择直接操作 文件流,而不是 fd +******************************************************************************/ +RC BinLogFileWriter::open(const char *filename, size_t max_file_size) +{ + filename_ = filename; + clean_filename_ = filename_; + while (!clean_filename_.empty() && clean_filename_.back() == '\n') { + clean_filename_.pop_back(); + } + // 这里仅是 初始化了文件信息,还没有 open 文件流 + RC ret; + bin_log_ = std::make_unique(filename, max_file_size, ret); + // 确保 open 失败时返回错误,而不是继续运行 + if (ret != RC::SUCCESS || bin_log_ == nullptr) { + LOG_ERROR("Failed to create binlog file: %s", filename); + bin_log_.reset(); // 确保指针清空 + return RC::IOERR_OPEN; + } + // 直接返回 当前文件的 可写位置,相当于继续写 + return bin_log_->open(); // 正确返回 RC::SUCCESS +} + +RC BinLogFileWriter::close() +{ + // 在 next_file 里调用,由于会先调用 close,所以这里可以直接返回 + // 只有外部第一次调用 open,才会初始化 bin_log_ + if (bin_log_ == nullptr) { + LOG_DEBUG("At first time revoke last_file or next file"); + return RC::FILE_NOT_OPENED; + } + + return bin_log_->close(); // 正确返回 RC::SUCCESS; +} + +RC BinLogFileWriter::write(AbstractEvent &event) +{ + return bin_log_->write_event_to_binlog(&event) ? RC::SUCCESS : RC::IOERR_WRITE; +} + +/****************************************************************************** + LogFileManager +******************************************************************************/ + +LogFileManager::LogFileManager() + : file_reader_(std::make_unique()) + , file_writer_(std::make_unique()) + , transform_manager_(std::make_unique()) + , ring_buffer_(std::make_shared>(RING_BUFFER_CAPACITY)) +{ + // 启动一个任务收集线程 + task_collector_thread_ = std::thread(&LogFileManager::process_tasks, this); + + // 启动专门的写入线程 + result_queue_.stop_flag_ = &stop_flag_; // 设置ResultQueue的stop_flag_指针 + + writer_thread_ = + std::thread([this] { result_queue_.process_writes(file_writer_.get(), this); }); + + cleaner_thread_ = std::thread(&LogFileManager::clean_logs, this); + // 其他初始化操作可以放在这里,比如设置初始状态等 + + start_time_ = std::chrono::high_resolution_clock::now(); +} + +LogFileManager::~LogFileManager() +{ + // main 函数最后部分,添加显式等待,如果等待 + // 转换的任务执行完,就不用显示调用 + shutdown(); // 显式关闭资源 + + if (thread_pool_) { + thread_pool_->log_status(); + LOG_INFO("[ Thread pool ] All task has done..."); + } + + if (file_writer_) { + LOG_INFO("[ logFileManager Deconstruct ] Closing file writer..."); + file_writer_->close(); + file_writer_.reset(); // Ensure destruction + } + + + try { + // 4. 最后关闭 index_fd_ + if (index_fd_ >= 0) { + ::close(index_fd_); + index_fd_ = -1; + } + + if (ckp_fd_ >= 0) { + ::close(ckp_fd_); + ckp_fd_ = -1; + } + } catch (const std::exception &e) { + LOG_ERROR("Exception in ~LogFileManager: %s", e.what()); + } + LOG_DEBUG(" global rc: %d", static_cast(global_runtime_status_.load(std::memory_order_relaxed))); +} + +RC LogFileManager::read_from_ckp_file_content(const std::filesystem::path &control_path, + std::string &ckp, + std::string &target_filename, + uint64 &offset) +{ + CkpInfo ckp_info; + + int fd = open(control_path.c_str(), O_RDONLY); + if (fd == -1) { + LOG_ERROR("Failed to open control file %s: %s", control_path.c_str(), strerror(errno)); + return RC::IOERR_OPEN; + } + + // 读取整个结构体 + ssize_t read_len = read(fd, &ckp_info, sizeof(CkpInfo)); + close(fd); + + if (read_len != sizeof(CkpInfo)) { + // 首次调用入湖,control里没内容,直接跳过处理 + if (read_len == 0) { + LOG_DEBUG("revoke api3, control file is empty now...waiting for revoking api2"); + return RC::SUCCESS; + } else { // 可能被恶意损坏 + LOG_ERROR("Failed to read control file %s: %s, may be corrupt", control_path.c_str(), strerror(errno)); + return RC::ControlFile_Corrupt; + } + } + + // 转换为输出参数 + ckp = std::string(ckp_info.checkpoint); + target_filename = std::string(ckp_info.log_file_name); + offset = static_cast(ckp_info.position); + + return RC::SUCCESS; +} + +static inline bool ends_with(const std::string& str, const std::string& suffix) { + if (str.length() < suffix.length()) { + return false; + } + return str.compare(str.length() - suffix.length(), suffix.length(), suffix) == 0; +} + +RC LogFileManager::get_fileno_from_filename(const std::string &filename, uint32_t &fileno) +{ + if (filename.compare(0, strlen(file_prefix_), file_prefix_) != 0) { + LOG_ERROR("invalid log file name: cannot calc file_no. filename=[%s]", filename.c_str()); + return RC::INVALID_ARGUMENT; + } + + if (ends_with(filename, ".index") || ends_with(filename, ".controlinfo")) { + return RC::SUCCESS; + } + + // 提取文件名的数字部分 + std::string_view lsn_str(filename.data() + strlen(file_prefix_) + 1, + filename.length() - strlen(file_prefix_) - 1); + std::from_chars_result result = + std::from_chars(lsn_str.data(), lsn_str.data() + lsn_str.size(), fileno); + if (result.ec != std::errc()) { + LOG_ERROR("invalid log file name: cannot calc file_no. filename=%s, error=%s", + filename.c_str(), strerror(static_cast(result.ec))); + return RC::INVALID_ARGUMENT; + } + + return RC::SUCCESS; +} + +RC LogFileManager::create_file(BinLogFileWriter &file_writer) +{ + // 最小从 1 开始 + uint32_t fileno = log_files_.empty() ? 1 : log_files_.rbegin()->first + 1; + + std::ostringstream oss; + oss << std::setw(BINLOG_NAME_WIDTH) << std::setfill('0') << fileno; + file_suffix_ = oss.str(); + + std::string nextFilename = file_prefix_ + std::string(file_dot_) + file_suffix_; + std::filesystem::path next_file_path = directory_ / nextFilename; + + std::lock_guard lock(log_file_mutex_); + auto create_time = std::chrono::steady_clock::now(); + auto timestamp = + std::chrono::duration_cast(create_time.time_since_epoch()).count(); + log_files_.emplace(fileno, std::make_pair(next_file_path, timestamp)); + // 写入索引文件 + std::string absolute_next_file = next_file_path.string(); + write_filename2index(absolute_next_file); + + LOG_INFO("[==rotate file==]next file name = %s", next_file_path.c_str()); + + last_file_no_.store(fileno, std::memory_order_release); // 更新当前文件号 + return file_writer.open(next_file_path.c_str(), max_file_size_per_file_); +} + +RC LogFileManager::next_file(BinLogFileWriter &file_writer) +{ + // 最小从 1 开始 + uint32_t fileno = log_files_.empty() ? 1 : log_files_.rbegin()->first + 1; + + std::ostringstream oss; + oss << std::setw(BINLOG_NAME_WIDTH) << std::setfill('0') << fileno; + file_suffix_ = oss.str(); + + std::string nextFilename = file_prefix_ + std::string(file_dot_) + file_suffix_; + std::filesystem::path next_file_path = directory_ / nextFilename; + std::string absolute_next_file = next_file_path.string(); // 获取绝对路径 + + if (!log_files_.empty()) { + // 在上一个文件中,写入一个 rotate event 再关闭 + auto rotateEvent = std::make_unique( + absolute_next_file.c_str(), // 文件绝对路径 + absolute_next_file.length(), // 文件名长度 + Rotate_event::DUP_NAME, + 4 // binlog version 4 + ); + + // 写入rotate event并刷新到磁盘 + file_writer.get_binlog()->write_event_to_binlog(rotateEvent.get()); + file_writer.get_binlog()->flush(); + + file_writer.close(); + } + + std::lock_guard lock(log_file_mutex_); + auto create_time = std::chrono::steady_clock::now(); + auto timestamp = + std::chrono::duration_cast(create_time.time_since_epoch()).count(); + log_files_.emplace(fileno, std::make_pair(next_file_path, timestamp)); + // 写入索引文件 + write_filename2index(absolute_next_file); // 使用绝对路径 + + LOG_INFO("[==rotate file==]next file name = %s", next_file_path.c_str()); + + last_file_no_.store(fileno, std::memory_order_release); // 更新当前文件号 + return file_writer.open(next_file_path.c_str(), max_file_size_per_file_); +} + +RC LogFileManager::write_filename2index(std::string &filename) +{ + filename += "\n"; // 添加换行符 + ssize_t write_len = write(index_fd_, filename.c_str(), filename.length()); + if (write_len != static_cast(filename.length())) { + LOG_ERROR("Failed to write to index file, expected %zu bytes, wrote %zd " + "bytes, error: %s", + filename.length(), write_len, strerror(errno)); + return RC::IOERR_WRITE; + } + + return RC::SUCCESS; +} + +RC LogFileManager::actual_write_ckp2control(const CkpInfo &ckp_info) +{ + std::lock_guard lock(ckp_write_mutex_); + + if (lseek(ckp_fd_, 0, SEEK_SET) == -1) { + LOG_ERROR("Failed to seek to beginning of file: %s", strerror(errno)); + return RC::IOERR_SEEK; + } + + if (ftruncate(ckp_fd_, 0) == -1) { + LOG_ERROR("Failed to truncate file: %s", strerror(errno)); + return RC::IOERR_WRITE; + } + + // 直接写入整个结构体 + ssize_t write_len = write(ckp_fd_, &ckp_info, sizeof(CkpInfo)); + if (write_len != sizeof(CkpInfo)) { + LOG_ERROR("Failed to write to control file: %s", strerror(errno)); + return RC::IOERR_WRITE; + } + + // 异步刷盘 + if (fdatasync(ckp_fd_) == -1) { + LOG_ERROR("Failed to sync file: %s", strerror(errno)); + return RC::IOERR_SYNC; + } + + return RC::SUCCESS; +} + +void LogFileManager::clean_logs() +{ + last_expiration_check_ = std::chrono::steady_clock::now(); + + while (!stop_flag_) { + std::unique_lock lock(log_file_mutex_); + cleaner_cv_.wait_for(lock, std::chrono::seconds(2), [this]() { + auto now = std::chrono::steady_clock::now(); + auto duration_since_last_check = now - last_expiration_check_; + bool should_check_expiration = duration_since_last_check >= expiration_check_interval_; + + return stop_flag_ || log_files_.size() >= binlog_num_threshold_ * trigger_ratio_ || + should_check_expiration; + }); + + if (stop_flag_) + break; + + // 如果没有文件需要清理,直接继续 + if (log_files_.empty()) { + continue; + } + + auto now = std::chrono::steady_clock::now(); + auto duration_since_last_check = now - last_expiration_check_; + bool should_check_expiration = (duration_since_last_check > expiration_check_interval_); + + // 记录当前检查间隔 + auto check_interval_seconds = + std::chrono::duration_cast(duration_since_last_check).count(); + + + + + + // 1. 检查过期文件 + if (should_check_expiration) { + last_expiration_check_ = now; + auto now_ts = + std::chrono::duration_cast(now.time_since_epoch()).count(); + + std::vector>::iterator> + expired_files; + + // 保证最后一个文件不会被删除,即使到达了过期时间 + uint32_t last_file_no = log_files_.rbegin()->first; + + for (auto it = log_files_.begin(); it != log_files_.end(); ++it) { + // 跳过最后一个文件 + if (it->first == last_file_no) { + continue; + } + + auto file_age = now_ts - it->second.second; + if (file_age >= expiration_check_interval_.count()) { + LOG_DEBUG("Found expired file: %s, age: %ld seconds", it->second.first.c_str(), + file_age); + expired_files.push_back(it); + } + } + + if (!expired_files.empty()) { + LOG_DEBUG("Cleaning %zu expired log files...", expired_files.size()); + for (auto it : expired_files) { + std::filesystem::remove(it->second.first); + LOG_DEBUG("Deleted expired file [%s]", it->second.first.c_str()); + log_files_.erase(it); + } + } + } + + // 2. 基于数量的清理逻辑 + size_t current_size = log_files_.size(); + if (current_size >= binlog_num_threshold_ * trigger_ratio_) { + size_t num_to_clean = static_cast(current_size * clean_ratio_); + if (num_to_clean > 0 && current_size > 1) { // 确保至少保留一个文件 + LOG_DEBUG("Current files: %zu, Cleaning %zu old log files based on count...", + current_size, num_to_clean); + + auto it = log_files_.begin(); + for (size_t i = 0; i < num_to_clean && log_files_.size() > 1; ++i) { + std::filesystem::remove(it->second.first); + LOG_DEBUG("Deleted [%s] file", it->second.first.c_str()); + it = log_files_.erase(it); + } + } + } + } +} + +void LogFileManager::shutdown() +{ + if (!stop_flag_) { + LOG_INFO("Starting shutdown sequence..."); + + // 1. 先等待所有已提交的任务完成 + wait_for_completion(); + + log_progress(); + + // 2. 设置停止标志,阻止新任务提交 + stop_flag_ = true; + LOG_INFO("Stop flag set, no new tasks will be accepted"); + + // 3. 等待收集线程结束 + if (task_collector_thread_.joinable()) { + LOG_INFO("Waiting for task collector thread to join"); + task_collector_thread_.join(); + } + + // 4. 关闭线程池 + LOG_INFO("Shutting down thread pool"); + thread_pool_->shutdown(); + LOG_INFO("Thread pool max size was: %d", thread_pool_->largest_pool_size()); + + // 5. 等待写入线程结束 + if (writer_thread_.joinable()) { + LOG_INFO("Waiting for writer thread to join"); + writer_thread_.join(); + } + + // 6. 等待清理线程结束 + cleaner_cv_.notify_one(); + if (cleaner_thread_.joinable()) { + cleaner_thread_.join(); + } + + // 7. 记录总执行时间 + auto endTime = std::chrono::high_resolution_clock::now(); + auto duration = + std::chrono::duration_cast(endTime - start_time_).count(); + LOG_DEBUG("Total execution time: %ld ms", duration); + + LOG_INFO("All threads joined, final progress:"); + log_progress(); + } +} + +void LogFileManager::wait_for_completion() +{ + LOG_INFO("Waiting for all tasks to complete..."); + // 1. 等待任务入队完成 + while (finished_tasks_ < total_solve_tasks_) { + LOG_INFO("Remaining tasks: %zu", total_solve_tasks_ - finished_tasks_); + { + std::unique_lock lock(task_mutex_); + task_cond_.notify_one(); // 通知处理线程处理剩余任务 + } + std::this_thread::sleep_for(std::chrono::milliseconds(THREAD_WAIT_TIMEOUT_MS)); + } + + LOG_INFO("ring buffer is empty...Waiting for all tasks transform..."); + + // 2. 等待线程池中的任务执行完成 + thread_pool_->await_termination(); + + log_progress(); + + LOG_INFO("transform to buffer is done...Waiting for all ResultBatch to write to " + "binlog..."); + + // 3. 等待写入队列完成 + { + std::unique_lock lock(result_queue_.mutex_); + while (!result_queue_.pending_results_.empty()) { + result_queue_.cv_.notify_one(); + result_queue_.cv_.wait_for(lock, std::chrono::milliseconds(THREAD_WAIT_TIMEOUT_MS)); + } + } + + LOG_INFO("All tasks and writes completed."); +} + +/** + * @brief 获取文件的创建时间 + */ +long get_file_create_time(const std::filesystem::path &path) +{ + struct stat st; + if (stat(path.c_str(), &st) != 0) { + LOG_ERROR("Failed to get file stats for %s", path.c_str()); + return 0; + } + return static_cast(st.st_ctime); +} + +RC LogFileManager::SetBinlogPath(char *bashPathBytes, int length, long maxSize, int threadNum, + long capacity, int expirationTime) +{ + if (length >= FN_REFLEN) { + LOG_ERROR("dir_path invalid length, should be less than 512 byte. length=%d", length); + return RC::INVALID_ARGUMENT; + } + + // 验证 expirationTime 参数 + if (expirationTime <= 0) { + LOG_ERROR("Invalid expiration time: %d, using default value", expirationTime); + expirationTime = DEFAULT_EXPIRATION_TIME; + } + + // 手动拷贝出 bashPathBytes 的前 length 个字符,避免 bashPathBytes 尾部存在的其他字符污染 + std::string bash_str(bashPathBytes, length); + directory_ = std::filesystem::absolute(std::filesystem::path(bash_str)); + file_prefix_ = DEFAULT_BINLOG_FILE_NAME_PREFIX; + max_file_size_per_file_ = maxSize; + + transform_max_thread_num_ = threadNum; + // 初始化线程池, 在 task_collector_thread_ 准备好一个 batch 任务之后,投入线程池中执行 + thread_pool_ = std::make_unique(); + if (RC::SUCCESS != thread_pool_->init(THREAD_POOL_NAME, CORE_THREAD_NUM, transform_max_thread_num_, + THRANSFORM_THREAD_ALIVE_MS)) { + LOG_ERROR("Failed to init thread pool"); + return RC::INVALID_ARGUMENT; + } + + binlog_num_threshold_ = capacity / maxSize; + expiration_check_interval_ = std::chrono::seconds(expirationTime); + + // 检查目录是否存在,不存在就创建出来 + if (!std::filesystem::is_directory(directory_)) { + LOG_INFO("directory is not exist. directory=%s", directory_.c_str()); + + std::error_code ec; + bool ret = std::filesystem::create_directories(directory_, ec); + if (!ret) { + LOG_ERROR("create directory failed. directory=%s, error=%s", directory_.c_str(), + ec.message().c_str()); + return RC::FILE_CREATE; + } + } + + // 如果当前目录下有文件,则会继续从下一个文件命名开始写,预先列出所有的日志文件到 log_files_里 + for (const std::filesystem::directory_entry &dir_entry : + std::filesystem::directory_iterator(directory_)) { + if (!dir_entry.is_regular_file()) { + continue; + } + + std::string filename = dir_entry.path().filename().string(); + + uint32_t fileno = 0; + RC rc = get_fileno_from_filename(filename, fileno); + if (LOFT_FAIL(rc)) { + LOG_INFO("invalid log file name. filename=%s", filename.c_str()); + continue; + } + + if (log_files_.find(fileno) != log_files_.end()) { + LOG_INFO("duplicate log file. filename1=%s, filename2=%s", filename.c_str(), + log_files_.find(fileno)->second.filename().c_str()); + continue; + } + long create_time = get_file_create_time(dir_entry.path()); + log_files_.emplace(fileno, std::make_pair(dir_entry.path(), create_time)); + } + + LOG_INFO("init log file manager success. directory=%s, log files=%d", directory_.c_str(), + static_cast(log_files_.size())); + + // 获得索引文件 句柄 + std::filesystem::path index_path = directory_ / (file_prefix_ + index_suffix_); + index_path_storage = index_path.string(); // 先保存到静态存储 + default_binlog_index_file_name_ = index_path_storage.c_str(); // 使用静态存储的字符串 + + // 打开文件,使用读写模式,如果文件不存在则创建,以追加模式打开,文件权限为0644 + index_fd_ = ::open(index_path.c_str(), O_RDWR | O_CREAT | O_APPEND, 0644); + if (index_fd_ < 0) { + LOG_ERROR("open file failed. filename=%s, error=%s", index_path.c_str(), strerror(errno)); + return RC::IOERR_OPEN; + } + + std::filesystem::path control_path = directory_ / (file_prefix_ + control_file_suffix_); + + // 检查control文件是否存在 + if (std::filesystem::exists(control_path)) { + // 读取control文件内容 + std::string ckp = ""; + std::string target_filename = ""; + uint64 offset = 0; + + RC ret = read_from_ckp_file_content(control_path, ckp, target_filename, offset); + if (LOFT_FAIL(ret)) { + LOG_ERROR("Failed to read control file %s: %s", control_path.c_str(), strerror(errno)); + return ret; + } + if (target_filename.empty()) { + LOG_DEBUG("last binlog file is empty...waiting for revoking api2"); + } else { + // 检查目标文件是否存在 + std::filesystem::path target_path = directory_ / target_filename; + if (!std::filesystem::exists(target_path)) { + LOG_ERROR("Target file %s does not exist, skipping truncation", target_filename.c_str()); + return RC::FILE_NOT_EXIST; + } else { + // 打开目标文件 + int target_fd = ::open(target_path.c_str(), O_RDWR); + if (target_fd >= 0) { + // 截断文件到指定位置 + if (ftruncate(target_fd, offset) == 0) { + LOG_INFO("Successfully truncated file %s to offset %lu", + target_filename.c_str(), offset); + } else { + LOG_ERROR("Failed to truncate file %s: %s", target_filename.c_str(), + strerror(errno)); + } + close(target_fd); + } else { + LOG_ERROR("Failed to open target file %s: %s", target_filename.c_str(), + strerror(errno)); + } + } + } + } + + ckp_fd_ = ::open(control_path.c_str(), O_RDWR | O_CREAT, 0644); + if (ckp_fd_ < 0) { + LOG_ERROR("open file failed. filename=%s, error=%s", control_path.c_str(), strerror(errno)); + return RC::IOERR_OPEN; + } + // 每调用一次 SetBinlogPath 接口就重新打开一个新的文件 + return create_file(*get_file_writer()); +} + +/** + * @brief 异步调用,移动拷贝数据,批处理,当达到 Batch_SIZE 时,就被丢进 + * 任务队列里给 消费者线程去执行 + * @param buf + * @param is_ddl + * @return + */ +std::future LogFileManager::ConvertFlatBufferToBinlog(char *fbStr, int length, bool is_ddl) +{ + auto promise = std::make_shared>(); + auto future = promise->get_future(); + + try { + total_solve_tasks_++; // 增加待处理任务计数 + Task task(fbStr, length, is_ddl); + + if (!ring_buffer_->write(std::move(task))) { + total_solve_tasks_--; // 写入失败时需要减少计数 + promise->set_value(RC::SPEED_LIMIT); + return future; + } + + auto current_status = global_runtime_status_.load(std::memory_order_relaxed); + if (current_status != RC::SUCCESS) { + promise->set_value(current_status); + } else { + promise->set_value(RC::SUCCESS); + } + } catch (const std::exception &e) { + total_solve_tasks_--; // 异常时也需要减少计数 + promise->set_exception(std::current_exception()); + } + + return future; +} + +void LogFileManager::process_tasks() +{ + while (!stop_flag_) { + std::vector batch_tasks; + batch_tasks.reserve(BATCH_SIZE); + + { + std::unique_lock lock(task_mutex_); + task_cond_.wait_for(lock, std::chrono::milliseconds(THREAD_WAIT_TIMEOUT_MS), [this] { + return stop_flag_ || ring_buffer_->get_task_count_blocking() > 0; + }); + + if (stop_flag_ && ring_buffer_->get_task_count_blocking() == 0) + break; + + size_t tasks_to_read = std::min(ring_buffer_->get_task_count_blocking(), BATCH_SIZE); + for (size_t i = 0; i < tasks_to_read; ++i) { + Task task; + if (ring_buffer_->read(task)) { + batch_tasks.push_back(std::move(task)); + } + } + + if (!batch_tasks.empty()) { + auto processor = std::make_shared(this, std::move(batch_tasks), + batch_sequence_++); + thread_pool_->execute([processor] { processor->run(); }); + } + } + } +} + +RC LogFileManager::ConvertToBinlog(char *jsonStrBytes, int length) +{ + return RC::UNIMPLEMENTED; +} + +RC LogFileManager::GetLastScnAndSeq(long &scn, long &seq, std::string &ckp) +{ + std::filesystem::path control_path = directory_ / (file_prefix_ + control_file_suffix_); + + // 读文件 + { + std::lock_guard lock(ckp_write_mutex_); + // 检查control文件是否存在 + if (std::filesystem::exists(control_path)) { + CkpInfo ckp_info; + + int fd = open(control_path.c_str(), O_RDONLY); + if (fd == -1) { + LOG_ERROR("Failed to open control file %s: %s", control_path.c_str(), strerror(errno)); + return RC::IOERR_OPEN; + } + + // 读取整个结构体 + ssize_t read_len = read(fd, &ckp_info, sizeof(CkpInfo)); + close(fd); + + if (read_len != sizeof(CkpInfo)) { + // 首次调用入湖,control里没内容,直接跳过处理 + if (read_len == 0) { + LOG_DEBUG("revoke api3, control file is empty now...waiting for revoking api2"); + return RC::SUCCESS; + } else { // 可能被恶意损坏 + LOG_ERROR("Failed to read control file %s: %s, may be corrupt", control_path.c_str(), strerror(errno)); + return RC::ControlFile_Corrupt; + } + } + + // 直接从结构体中获取值 + seq = ckp_info.seq; + scn = ckp_info.scn; + ckp = std::string(ckp_info.checkpoint); + + } else { + LOG_ERROR("control file not exits"); + return RC::FILE_NOT_EXIST; + } + } + + return RC::SUCCESS; +} + +std::unique_ptr g_log_file_manager; + +/****************************************************************************** + 对外暴露 API +******************************************************************************/ +RC SetBinlogPath(char *bashPathBytes, int length, long maxSize, int threadNum, long capacity, + int expirationTime) +{ + if (!g_log_file_manager) { + g_log_file_manager = std::make_unique(); + } + return g_log_file_manager->SetBinlogPath(bashPathBytes, length, maxSize, threadNum, capacity, + expirationTime); +} + +RC GetLastScnAndSeq(long *scn, long *seq, char *ckp) +{ + std::string ckp_str = ""; + RC rc = g_log_file_manager->GetLastScnAndSeq(*scn, *seq, ckp_str); + + if (rc == RC::SUCCESS && ckp != nullptr) { + strncpy(ckp, ckp_str.c_str(), ckp_str.size() + 1); + } + return rc; +} + +RC ConvertToBinlog(char *jsonStrBytes, int length) +{ + return g_log_file_manager->ConvertToBinlog(jsonStrBytes, length); +} + +RC ConvertFlatBufferToBinlog(char *fbStr, int length, bool is_ddl) +{ + return (g_log_file_manager->ConvertFlatBufferToBinlog(fbStr, length, is_ddl)).get(); +} diff --git a/binlogconvert/src/sql/mysql_fields.cpp b/binlogconvert/src/sql/mysql_fields.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b17663e61299ef0129932dcc8f61e31cb4280298 --- /dev/null +++ b/binlogconvert/src/sql/mysql_fields.cpp @@ -0,0 +1,414 @@ +// +// Created by Coonger on 2024/10/28. +// +#include "sql/mysql_fields.h" + +#include + +#include "common/logging.h" +#include "utils/little_endian.h" + +namespace mysql { + +inline uint my_decimal_length_to_precision(uint length, uint scale, bool unsigned_flag) +{ + if (!length && scale) { + LOG_ERROR("Precision can't be negative thus ignore unsigned_flag when length is 0."); + return 0; + } + uint retval = (uint)(length - (scale > 0 ? 1 : 0) - (unsigned_flag || !length ? 0 : 1)); + return retval; +} + +/// This is used as a table name when the table structure is not set up +Field::Field(uint32 length_arg, bool is_nullable_arg, unsigned char null_bit_arg, + const char *field_name_arg) + : field_name(field_name_arg) + , m_null(is_nullable_arg) + , field_length(length_arg) +{ + if (!is_nullable()) { + set_flag(NOT_NULL_FLAG); + } +} + +/** + Numeric fields base class constructor. +*/ +Field_num::Field_num(uint32_t len_arg, bool is_nullable_arg, unsigned char null_bit_arg, + const char *field_name_arg, uint8_t dec_arg, bool unsigned_arg) + : Field(len_arg, is_nullable_arg, null_bit_arg, field_name_arg) + , unsigned_flag(unsigned_arg) + , dec(dec_arg) +{ + if (unsigned_flag) { + set_flag(UNSIGNED_FLAG); + } +} + +/****************************************************************************** + Field_new_decimal +******************************************************************************/ +Field_new_decimal::Field_new_decimal(uint32_t len_arg, bool is_nullable_arg, + unsigned char null_bit_arg, const char *name, uint8_t dec_arg, + bool unsigned_arg) + : Field_num(len_arg, is_nullable_arg, null_bit_arg, name, dec_arg, unsigned_arg) +{ + precision = std::min(len_arg, uint(DECIMAL_MAX_PRECISION)); +} + +// 精度存在第一个 byte 中,小数位存在 第二个 byte 中 +int Field_new_decimal::do_save_field_metadata(unsigned char *metadata_ptr) const +{ + *metadata_ptr = precision; + *(metadata_ptr + 1) = decimals(); + return 2; +} + +/****************************************************************************** + Field_float +******************************************************************************/ + +int Field_float::do_save_field_metadata(unsigned char *metadata_ptr) const +{ + *metadata_ptr = pack_length(); + return 1; +} + +/****************************************************************************** + Field_double +******************************************************************************/ + +int Field_double::do_save_field_metadata(unsigned char *metadata_ptr) const +{ + *metadata_ptr = pack_length(); + return 1; +} + +/****************************************************************************** + Field_string +******************************************************************************/ +Field_str::Field_str(uint32_t len_arg, bool is_nullable_arg, unsigned char null_bit_arg, + const char *field_name_arg) + : Field(len_arg, is_nullable_arg, null_bit_arg, field_name_arg) +{ + // 默认的是 MY_CS_PRIMARY + set_flag(BINARY_FLAG); +} + +int Field_string::do_save_field_metadata(unsigned char *metadata_ptr) const +{ + if (field_length >= 1024) { + LOG_ERROR("field_length exceeds the maximum allowed value: 1024"); + return -1; + } + if ((real_type() & 0xF0) != 0xF0) { + LOG_ERROR("CHAR real_type does not match the expected value: %u", real_type()); + return -1; + } + LOG_INFO("field_length: %u, real_type: %u", field_length, real_type()); + *metadata_ptr = (real_type() ^ ((field_length & 0x300) >> 4)); // fe + *(metadata_ptr + 1) = (field_length) & 0xFF; // + return 2; +} + +/****************************************************************************** + Field_varstring +******************************************************************************/ +Field_varstring::Field_varstring(uint32_t len_arg, uint length_bytes_arg, bool is_nullable_arg, + uchar null_bit_arg, const char *field_name_arg) + : Field_longstr(len_arg, is_nullable_arg, null_bit_arg, field_name_arg) + , length_bytes(len_arg < 256 ? 1 : 2) +{ + // Table_SHARE 是用来统计 表中的字段信息 +} + +int Field_varstring::do_save_field_metadata(unsigned char *metadata_ptr) const +{ + if (field_length > 65535) { + LOG_ERROR("VARCHAR field length must less than 65535."); + return -1; + } + int2store(metadata_ptr, field_length); + return 2; +} + +/****************************************************************************** + Field_blob +******************************************************************************/ + +int Field_blob::do_save_field_metadata(unsigned char *metadata_ptr) const +{ + *metadata_ptr = pack_length_no_ptr(); + LOG_INFO("metadata: %u (pack_length_no_ptr)", *metadata_ptr); + return 1; +} + +int Field_json::do_save_field_metadata(unsigned char *metadata_ptr) const +{ + *metadata_ptr = 4; + return 1; +} + +/****************************************************************************** + Field_enum +******************************************************************************/ + +int Field_enum::do_save_field_metadata(unsigned char *metadata_ptr) const +{ + *metadata_ptr = real_type(); + *(metadata_ptr + 1) = pack_length(); + return 2; +} + +/****************************************************************************** + Field_bit +******************************************************************************/ + +Field_bit::Field_bit(uint32_t len_arg, bool is_nullable_arg, unsigned char null_bit_arg, + unsigned char bit_ofs_arg, const char *field_name_arg) + : Field(len_arg, is_nullable_arg, null_bit_arg, field_name_arg) + , bit_ofs(bit_ofs_arg) + , bit_len(len_arg & 7) + , bytes_in_rec(len_arg / 8) +{ + LOG_INFO("len_arg: %u, bit_len: " + "%u, bytes_in_rec: %u", + len_arg, bit_len, bytes_in_rec); + + set_flag(UNSIGNED_FLAG); + + if (!m_null) { + null_bit = bit_ofs_arg; + } +} + +int Field_bit::do_save_field_metadata(unsigned char *metadata_ptr) const +{ + LOG_INFO("bit_len: %d, bytes_in_rec: %d", bit_len, bytes_in_rec); + /* + Since this class and Field_bit_as_char have different ideas of + what should be stored here, we compute the values of the metadata + explicitly using the field_length. + */ + metadata_ptr[0] = field_length % 8; + metadata_ptr[1] = field_length / 8; + return 2; +} + +auto make_field(const char *field_name, size_t field_length, bool is_unsigned, bool is_nullable, + size_t null_bit, enum_field_types field_type, int interval_count, uint decimals) -> FieldRef +{ + uchar bit_offset = 0; + + if (field_type == MYSQL_TYPE_BIT) { + bit_offset = null_bit; + if (is_nullable) // if null field + { + bit_offset = (bit_offset + 1) & 7; + } + } + + if (!is_nullable) { + null_bit = 0; + } else { + null_bit = ((uchar)1) << null_bit; + } + + switch (field_type) { + case MYSQL_TYPE_VAR_STRING: + case MYSQL_TYPE_STRING: + return std::make_shared(field_length, is_nullable, null_bit, field_name); + case MYSQL_TYPE_VARCHAR: + return std::make_shared(field_length, + HA_VARCHAR_PACKLENGTH(field_length), + is_nullable, null_bit, field_name); + case MYSQL_TYPE_BLOB: + case MYSQL_TYPE_MEDIUM_BLOB: + case MYSQL_TYPE_TINY_BLOB: + case MYSQL_TYPE_LONG_BLOB: { + uint pack_length = + calc_pack_length(field_type, field_length) - portable_sizeof_char_ptr; + + switch (pack_length) { + case 1: + field_length = 255; + break; + case 2: + field_length = 65535; + break; + case 3: + field_length = 16777215; + break; + case 4: + field_length = 4294967295; + break; + } + return std::make_shared(field_length, is_nullable, null_bit, field_name, + true); + } + case MYSQL_TYPE_JSON: { + uint pack_length = + calc_pack_length(field_type, field_length) - portable_sizeof_char_ptr; + + return std::make_shared(field_length, is_nullable, null_bit, field_name, + pack_length); + } + case MYSQL_TYPE_ENUM: + if (interval_count == 0) { + LOG_ERROR("ENUM field shouldn't be empty"); + return nullptr; + } + return std::make_shared(field_length, is_nullable, null_bit, field_name, + get_enum_pack_length(interval_count)); + case MYSQL_TYPE_SET: + if (interval_count == 0) { + LOG_ERROR("SET field shouldn't be empty"); + return nullptr; + } + return std::make_shared(field_length, is_nullable, null_bit, field_name, + get_set_pack_length(interval_count)); + case MYSQL_TYPE_DECIMAL: // never + return std::make_shared(field_length, is_nullable, null_bit, field_name, + decimals, is_unsigned); + case MYSQL_TYPE_NEWDECIMAL: + return std::make_shared(field_length, is_nullable, null_bit, + field_name, decimals, is_unsigned); + case MYSQL_TYPE_FLOAT: + return std::make_shared(field_length, is_nullable, null_bit, field_name, + decimals, is_unsigned); + case MYSQL_TYPE_DOUBLE: + return std::make_shared(field_length, is_nullable, null_bit, field_name, + decimals, is_unsigned); + case MYSQL_TYPE_TINY: + return std::make_shared(field_length, is_nullable, null_bit, field_name, + is_unsigned); + case MYSQL_TYPE_SHORT: + return std::make_shared(field_length, is_nullable, null_bit, field_name, + is_unsigned); + case MYSQL_TYPE_INT24: + return std::make_shared(field_length, is_nullable, null_bit, field_name, + is_unsigned); + case MYSQL_TYPE_LONG: + return std::make_shared(field_length, is_nullable, null_bit, field_name, + is_unsigned); + case MYSQL_TYPE_LONGLONG: + return std::make_shared(field_length, is_nullable, null_bit, field_name, + is_unsigned); + case MYSQL_TYPE_YEAR: + return std::make_shared(is_nullable, null_bit, field_name); + case MYSQL_TYPE_TIMESTAMP2: + return std::make_shared(is_nullable, null_bit, field_name, + field_length); + case MYSQL_TYPE_DATE: + return std::make_shared(is_nullable, null_bit, field_name); + case MYSQL_TYPE_TIME: + return std::make_shared(is_nullable, null_bit, field_name, field_length); + case MYSQL_TYPE_DATETIME: + return std::make_shared(is_nullable, null_bit, field_name, + field_length); + case MYSQL_TYPE_BIT: + return std::make_shared(field_length, is_nullable, null_bit, bit_offset, + field_name); + case MYSQL_TYPE_INVALID: + case MYSQL_TYPE_BOOL: + case MYSQL_TYPE_TIMESTAMP: + case MYSQL_TYPE_TIME2: + case MYSQL_TYPE_DATETIME2: + case MYSQL_TYPE_NEWDATE: + LOG_INFO("Field type %d not impl, refer to enum_field_types.h status " + "code", + field_type); + default: + break; + } + return nullptr; +} + +enum_field_types get_blob_type_from_length(size_t length) +{ + enum_field_types type; + if (length < 256) { + type = MYSQL_TYPE_TINY_BLOB; + } else if (length < 65536) { + type = MYSQL_TYPE_BLOB; + } else if (length < 256L * 256L * 256L) { + type = MYSQL_TYPE_MEDIUM_BLOB; + } else { + type = MYSQL_TYPE_LONG_BLOB; + } + return type; +} + +size_t calc_pack_length(enum_field_types type, size_t length) +{ + switch (type) { + case MYSQL_TYPE_VAR_STRING: + case MYSQL_TYPE_STRING: + case MYSQL_TYPE_DECIMAL: + return (length); + case MYSQL_TYPE_VARCHAR: + return (length + (length < 256 ? 1 : 2)); + case MYSQL_TYPE_BOOL: + case MYSQL_TYPE_YEAR: + case MYSQL_TYPE_TINY: + return 1; + case MYSQL_TYPE_SHORT: + return 2; + case MYSQL_TYPE_INT24: + case MYSQL_TYPE_NEWDATE: + return 3; + case MYSQL_TYPE_TIME: + return 3; + case MYSQL_TYPE_TIME2: + return length > MAX_TIME_WIDTH ? my_time_binary_length(length - MAX_TIME_WIDTH - 1) : 3; + case MYSQL_TYPE_TIMESTAMP: + return 4; + case MYSQL_TYPE_TIMESTAMP2: + return length > MAX_DATETIME_WIDTH + ? my_timestamp_binary_length(length - MAX_DATETIME_WIDTH - 1) + : 4; + case MYSQL_TYPE_DATE: + case MYSQL_TYPE_LONG: + return 4; + case MYSQL_TYPE_FLOAT: + return sizeof(float); + case MYSQL_TYPE_DOUBLE: + return sizeof(double); + case MYSQL_TYPE_DATETIME: + return 8; + case MYSQL_TYPE_DATETIME2: + return length > MAX_DATETIME_WIDTH + ? my_datetime_binary_length(length - MAX_DATETIME_WIDTH - 1) + : 5; + case MYSQL_TYPE_LONGLONG: + return 8; /* Don't crash if no longlong */ + case MYSQL_TYPE_NULL: + return 0; + case MYSQL_TYPE_TINY_BLOB: + return 1 + portable_sizeof_char_ptr; + case MYSQL_TYPE_BLOB: + return 2 + portable_sizeof_char_ptr; + case MYSQL_TYPE_MEDIUM_BLOB: + return 3 + portable_sizeof_char_ptr; + case MYSQL_TYPE_LONG_BLOB: + return 4 + portable_sizeof_char_ptr; + case MYSQL_TYPE_GEOMETRY: + return 4 + portable_sizeof_char_ptr; + case MYSQL_TYPE_JSON: + return 4 + portable_sizeof_char_ptr; + case MYSQL_TYPE_SET: + case MYSQL_TYPE_ENUM: + case MYSQL_TYPE_NEWDECIMAL: + return 0; // This shouldn't happen + case MYSQL_TYPE_BIT: + return length / 8; + case MYSQL_TYPE_INVALID: + case MYSQL_TYPE_TYPED_ARRAY: + break; + } + LOG_ERROR("invalid field type"); + return 0; +} + +} // namespace mysql diff --git a/binlogconvert/src/transform_manager.cpp b/binlogconvert/src/transform_manager.cpp new file mode 100644 index 0000000000000000000000000000000000000000..af74cb933ce5abe17f564dcbc459aec05ca646a2 --- /dev/null +++ b/binlogconvert/src/transform_manager.cpp @@ -0,0 +1,381 @@ +#include "transform_manager.h" +#include "binlog.h" + +#include "format/ddl_generated.h" +#include "format/dml_generated.h" + +#include "events/control_events.h" +#include "events/rows_event.h" +#include "events/statement_events.h" +#include "events/write_event.h" + +#include "sql/mysql_fields.h" +#include "utils/base64.h" + +#include "common/logging.h" +#include "common/macros.h" +#include "data_handler.h" + +#include +#include +#include + +#include +#include +#include +#include + +inline uint64_t LogFormatTransformManager::stringToTimestamp(const std::string &timeString) +{ + std::tm timeStruct = {}; + + const char *str = timeString.c_str(); + const char *p = str; + + // 直接解析年月日时分秒 + timeStruct.tm_year = + (p[0] - '0') * 1000 + (p[1] - '0') * 100 + (p[2] - '0') * 10 + (p[3] - '0') - 1900; + timeStruct.tm_mon = (p[5] - '0') * 10 + (p[6] - '0') - 1; + timeStruct.tm_mday = (p[8] - '0') * 10 + (p[9] - '0'); + timeStruct.tm_hour = (p[11] - '0') * 10 + (p[12] - '0'); + timeStruct.tm_min = (p[14] - '0') * 10 + (p[15] - '0'); + timeStruct.tm_sec = (p[17] - '0') * 10 + (p[18] - '0'); + + // 检查格式是否正确 + if (p[4] != '-' || p[7] != '-' || p[10] != ' ' || p[13] != ':' || p[16] != ':') { + LOG_ERROR("Invalid time format"); + return -1; + } + + // 解析微秒部分 + int microseconds = 0; + if (timeString.length() > 19 && p[19] == '.') { + p += 20; // 移到小数点后第一位 + int multiplier = 100000; // 从最高位开始 + while (*p >= '0' && *p <= '9' && multiplier > 0) { + microseconds += (*p - '0') * multiplier; + multiplier /= 10; + ++p; + } + } + + time_t timeEpoch = timegm(&timeStruct); + auto timeSinceEpoch = std::chrono::system_clock::from_time_t(timeEpoch); + + // 减去8小时偏移 + timeSinceEpoch -= std::chrono::hours(8); + + // 添加微秒 + auto timePointWithMicroseconds = timeSinceEpoch + std::chrono::microseconds(microseconds); + + return std::chrono::duration_cast( + timePointWithMicroseconds.time_since_epoch()) + .count(); +} + +inline enum_field_types LogFormatTransformManager::ConvertStringType(std::string_view type_str) +{ + auto it = type_map.find(type_str); + + if (it != type_map.end()) { + return it->second; + } else { + return MYSQL_TYPE_INVALID; + } +} + +RC LogFormatTransformManager::processRowData( + const ::flatbuffers::Vector<::flatbuffers::Offset> &data, Rows_event *row, + const std::unordered_map &field_map, + const std::vector &field_vec, bool is_before) +{ + RC rc = RC::SUCCESS; + row->setBefore(is_before); + + // 使用数组来收集数据,下标对应field的序号 + const size_t max_field_size = field_vec.size() + 1; // +1因为field_idx从1开始 + std::vector field_present(max_field_size, 0); + std::vector rows; + std::vector rows_null; + std::vector ordered_data(max_field_size, nullptr); + + // 第一次遍历:将数据放入对应位置 + for (size_t i = 0; i < data.size(); ++i) { + auto item = data[i]; + if (item == nullptr || item->key() == nullptr) { + LOG_ERROR("kvPair or key is null"); + return RC::INVALID_ARGUMENT; + } + int field_idx = field_map.at(item->key()->c_str()); + field_present[field_idx] = 1; + ordered_data[field_idx] = item; + } + + // 收集实际存在的字段数据 + rows.reserve(data.size()); + rows_null.reserve(data.size()); + + // 按顺序处理存在的字段 + for (size_t field_idx = 1; field_idx < max_field_size; ++field_idx) { + if (field_present[field_idx]) { + auto item = ordered_data[field_idx]; + rows.push_back(field_idx); + bool is_null = (item->value_type() == DataMeta_NONE); + rows_null.push_back(is_null ? 1 : 0); + } + } + + // 设置rows和rows_null + if (is_before) { + row->set_rows_before(std::move(rows)); + row->set_null_before(std::move(rows_null)); + } else { + row->set_rows_after(std::move(rows)); + row->set_null_after(std::move(rows_null)); + } + + // 按顺序处理非空数据 + for (size_t field_idx = 1; field_idx < max_field_size; ++field_idx) { + if (field_present[field_idx]) { + auto item = ordered_data[field_idx]; + if (item->value_type() != DataMeta_NONE) { + if (auto handler = getHandler(item->value_type())) { + rc = handler->processData(item, field_vec[field_idx - 1].get(), row); + if (rc != RC::SUCCESS) { + LOG_ERROR("processData failed: %s", strrc(rc)); + return rc; + } + } + } + } + } + + return rc; +} + +RC LogFormatTransformManager::transformDDL(const DDL *ddl, + std::vector> &events) +{ + RC rc = RC::SUCCESS; + auto ddlType = ddl->ddl_type(); + + auto dbName = ddl->db_name(); + auto ddlSql = ddl->ddl_sql(); + + auto immediateCommitTs = ddl->msg_time(); + auto originalCommitTs = ddl->tx_time(); + + // 1. 构造 GTID event + auto lastCommit = ddl->last_commit(); + auto txSeq = ddl->tx_seq(); + + uint64 i_ts, o_ts; + + i_ts = stringToTimestamp(immediateCommitTs->c_str()); + o_ts = stringToTimestamp(originalCommitTs->c_str()); + if (i_ts == -1 || o_ts == -1) { + LOG_ERROR("Invalid time format"); + return RC::INVALID_ARGUMENT; + } + + std::unique_ptr gtidEvent = std::make_unique( + lastCommit, txSeq, true, o_ts, i_ts, ORIGINAL_SERVER_VERSION, IMMEDIATE_SERVER_VERSION); + + // 2. 构造 Query event + const char *query_arg = ddlSql->data(); + const char *catalog_arg = nullptr; + const char *db_arg = nullptr; + if (dbName != nullptr) { + db_arg = dbName->c_str(); + } + catalog_arg = "std"; // binlog v4里,catalog_name 是固定的 + + uint32_t query_length = strlen(query_arg); + LOG_INFO("query_: %s, query_len: %d", query_arg, query_length); + + uint64 thread_id_arg = THREAD_ID; + int errcode = ERROR_CODE; + + auto queryEvent = std::make_unique(query_arg, catalog_arg, db_arg, txSeq, + query_length, thread_id_arg, errcode, o_ts); + + if (ddlType == nullptr) { // drop db + LOG_INFO("sql_type: drop db | create/drop procedure/function"); + } else { + std::string sql_type = ddlType->c_str(); + } + + events.push_back(std::move(gtidEvent)); + events.push_back(std::move(queryEvent)); + return rc; +} + +RC LogFormatTransformManager::transformDML(const DML *dml, + std::vector> &events) +{ + RC rc = RC::SUCCESS; + auto lastCommit = dml->last_commit(); + auto txSeq = dml->tx_seq(); + auto immediateCommitTs = dml->msg_time(); + auto originalCommitTs = dml->tx_time(); + + uint64 i_ts, o_ts; + + i_ts = stringToTimestamp(immediateCommitTs->c_str()); + o_ts = stringToTimestamp(originalCommitTs->c_str()); + if (i_ts == -1 || o_ts == -1) { + LOG_ERROR("Invalid time format"); + return RC::INVALID_ARGUMENT; + } + + auto ge = std::make_unique(lastCommit, txSeq, true, o_ts, i_ts, + ORIGINAL_SERVER_VERSION, IMMEDIATE_SERVER_VERSION); + + //////////****************** gtid event end ******************************* + + //////////****************** query event start **************************** + const char *query_arg = DML_QUERY_STR; // row-based 的 DML 固定内容是 BEGIN + auto dbName = dml->db_name(); + const char *db_arg = dbName->c_str(); + const char *catalog_arg = "std"; // 在binlog v4中,目录名称通常被设置为 "std" + + uint32 query_length = strlen(query_arg); + LOG_INFO("query_: %s, query_len: %d", query_arg, query_length); + + uint64 thread_id_arg = THREAD_ID; + int errcode = 0; + + auto qe = std::make_unique(query_arg, catalog_arg, db_arg, INVALID_XID, + query_length, thread_id_arg, errcode, o_ts); + //////////****************** query event end ****************************** + + //////////****************** table map event start ************************ + + auto table = dml->table_(); + const char *tbl_arg = table->c_str(); + auto fields = dml->fields(); + + std::unordered_map field_map; // [field_name, field_idx] + std::vector field_vec; + size_t null_bit = 0; + + int interval_count = 0; + int fieldIdx = 0; // 下标 + for (auto field : *fields) { + auto field_name = field->name(); + auto fieldMeta = field->meta(); + auto field_length = fieldMeta->length(); + bool is_unsigned = fieldMeta->is_unsigned(); + bool is_nullable = fieldMeta->nullable(); + auto decimals = fieldMeta->precision(); + + // 非字符串的类型也有 cs name,为 null + const char* csname_str = "binary"; // 默认值 + if (fieldMeta->csname() != nullptr) { + csname_str = fieldMeta->csname()->c_str(); + } + + auto data_type = fieldMeta->data_type(); + enum_field_types field_type; + if (!data_type) { + // 对于大文本字段,如果 dataType 为 null,默认使用 VARCHAR [FIXME?] + // field_type = MYSQL_TYPE_VARCHAR; + LOG_ERROR("Field %d '%s' has null data type", fieldIdx + 1, field_name->c_str()); + return RC::INVALID_ARGUMENT; + } else { + field_type = ConvertStringType(data_type->c_str()); + } + + if (field_type == MYSQL_TYPE_INVALID) { + LOG_ERROR("field type not supported"); + return RC::FIELD_TYPE_UNSUPPORTED; + } + + if (field_type == MYSQL_TYPE_ENUM || field_type == MYSQL_TYPE_SET) { + interval_count = field_length; + } + if (is_nullable) { + null_bit = fieldIdx; + } + + if ((field_type == MYSQL_TYPE_STRING || field_type == MYSQL_TYPE_VARCHAR)) { + auto cs_it = charset_multiplier.find(csname_str); + if (cs_it != charset_multiplier.end()) { + field_length *= cs_it->second; + } else { + LOG_ERROR("charset %s not supported", csname_str); + return RC::FIELD_CS_UNSUPPORTED; + } + } + + // 工厂函数 + auto field_obj = + mysql::make_field(field_name->c_str(), field_length, is_unsigned, is_nullable, null_bit, + field_type, interval_count, decimals); + if (!field_obj) { + LOG_ERROR(" column %d args may be wrong", fieldIdx + 1); + return RC::INVALID_ARGUMENT; + } + field_vec.emplace_back(field_obj); + field_map.insert({field_name->c_str(), ++fieldIdx}); + } + // TODO 需要根据 create table 时,记录 table_id, 这是全局的, + // 但table_id只是个db运行时的 table_map_event 和 row_event 的对应 + Table_id tid(DML_TABLE_ID); // 暂时随便写一个,实际上要做一个 连续的 id 分配器 + unsigned long colcnt = field_vec.size(); + // field_vec 内部的元素是共享的 + auto table_map_event = std::make_unique( + tid, colcnt, db_arg, strlen(db_arg), tbl_arg, strlen(tbl_arg), field_vec, o_ts); + + LOG_INFO("construct table map event end..."); + + //////////****************** table map event end ************************* + + //////////****************** rows event start **************************** + + auto opType = dml->op_type(); + Log_event_type rows_type = UNKNOWN_EVENT; + if (strcmp(opType->c_str(), "I") == 0) { + rows_type = Log_event_type::WRITE_ROWS_EVENT; + } else if (strcmp(opType->c_str(), "U") == 0) { + rows_type = Log_event_type::UPDATE_ROWS_EVENT; + } else if (strcmp(opType->c_str(), "D") == 0) { + rows_type = Log_event_type::DELETE_ROWS_EVENT; + } else { + LOG_ERROR("unknown opType: %s", opType->c_str()); + } + + auto row = std::make_unique(tid, colcnt, 1, rows_type, + o_ts); // 初始化 一个 rows_event 对象 + + if (auto keys = dml->keys()) { + rc = processRowData(*keys, row.get(), field_map, field_vec, true); + if (LOFT_FAIL(rc)) { + LOG_ERROR("process keys failed"); + return rc; + } + } + auto newData = dml->new_data(); + if (newData) { + rc = processRowData(*newData, row.get(), field_map, field_vec, false); + if (LOFT_FAIL(rc)) { + LOG_ERROR("process newData failed"); + return rc; + } + } + + //////////****************** rows event end **************************** + + //////////****************** xid event start ****************************** + + auto xe = std::make_unique(txSeq, o_ts); + LOG_INFO("construct xid event end..."); + + //////////****************** xid event end ****************************** + events.push_back(std::move(ge)); + events.push_back(std::move(qe)); + events.push_back(std::move(table_map_event)); + events.push_back(std::move(row)); + events.push_back(std::move(xe)); + return rc; +} diff --git a/binlogconvert/src/utils/decimal.cpp b/binlogconvert/src/utils/decimal.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0129640d72af59ac8ae4a84d701e79aa52f7c46e --- /dev/null +++ b/binlogconvert/src/utils/decimal.cpp @@ -0,0 +1,988 @@ +/* Copyright (c) 2004, 2022, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, + as published by the Free Software Foundation. + + This program is also distributed with certain software (including + but not limited to OpenSSL) that is licensed under separate terms, + as designated in a particular file or component or in included license + documentation. The authors of MySQL hereby grant you an additional + permission to link the program and your derivative works with the + separately licensed software that they have included with MySQL. + + Without limiting anything contained in the foregoing, this file, + which is part of C Driver for MySQL (Connector/C), is also subject to the + Universal FOSS Exception, version 1.0, a copy of which can be found at + http://oss.oracle.com/licenses/universal-foss-exception. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License, version 2.0, for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +// refer to: strings/decimal.cc +#include "utils/decimal.h" +#include "utils/little_endian.h" +#include "common/macros.h" +#include + +#define MAX_NEGATIVE_NUMBER ((ulonglong)0x8000000000000000LL) +#define INIT_CNT 9 +#define LFACTOR 1000000000ULL +#define LFACTOR1 10000000000ULL +#define LFACTOR2 100000000000ULL +#define DIG_PER_DEC1 9 +#define DIG_MASK 100000000 +#define DIG_BASE 1000000000 +#define DIG_MAX (DIG_BASE - 1) +#define ROUND_UP(X) (((X) + DIG_PER_DEC1 - 1) / DIG_PER_DEC1) +#define ADD(to, from1, from2, carry) /* assume carry <= 1 */ \ + do { \ + dec1 a = (from1) + (from2) + (carry); \ + assert((carry) <= 1); \ + if (((carry) = a >= DIG_BASE)) /* no division here! */ \ + a -= DIG_BASE; \ + (to) = a; \ + } while (0) + +static unsigned long lfactor[9] = {1L, 10L, 100L, 1000L, 10000L, + 100000L, 1000000L, 10000000L, 100000000L}; +static const dec1 frac_max[DIG_PER_DEC1 - 1] = {900000000, 990000000, 999000000, 999900000, + 999990000, 999999000, 999999900, 999999990}; + +static void do_mini_right_shift(decimal_t *dec, int shift, int beg, int last) +{ + dec1 *from = dec->buf + ROUND_UP(last) - 1; + dec1 *end = dec->buf + ROUND_UP(beg + 1) - 1; + int c_shift = DIG_PER_DEC1 - shift; + if (DIG_PER_DEC1 - ((last - 1) % DIG_PER_DEC1 + 1) < shift) + *(from + 1) = (*from % powers10[shift]) * powers10[c_shift]; + for (; from > end; from--) + *from = (*from / powers10[shift] + (*(from - 1) % powers10[shift]) * powers10[c_shift]); + *from = *from / powers10[shift]; +} + +static inline int count_leading_zeroes(int i, dec1 val) +{ + int ret = 0; + switch (i) { + /* @note Intentional fallthrough in all case labels */ + case 9: + if (val >= 1000000000) + break; + ++ret; + case 8: + if (val >= 100000000) + break; + ++ret; + case 7: + if (val >= 10000000) + break; + ++ret; + case 6: + if (val >= 1000000) + break; + ++ret; + case 5: + if (val >= 100000) + break; + ++ret; + case 4: + if (val >= 10000) + break; + ++ret; + case 3: + if (val >= 1000) + break; + ++ret; + case 2: + if (val >= 100) + break; + ++ret; + case 1: + if (val >= 10) + break; + ++ret; + case 0: + if (val >= 1) + break; + ++ret; + default: { + return ret; + } + } + return ret; +} + +static inline int count_trailing_zeroes(int i, dec1 val) +{ + uint32_t uval = val; + + int ret = 0; + switch (i) { + /* @note Intentional fallthrough in all case labels */ + case 0: + if ((uval % 1) != 0) + break; + ++ret; + case 1: + if ((uval % 10) != 0) + break; + ++ret; + case 2: + if ((uval % 100) != 0) + break; + ++ret; + case 3: + if ((uval % 1000) != 0) + break; + ++ret; + case 4: + if ((uval % 10000) != 0) + break; + ++ret; + case 5: + if ((uval % 100000) != 0) + break; + ++ret; + case 6: + if ((uval % 1000000) != 0) + break; + ++ret; + case 7: + if ((uval % 10000000) != 0) + break; + ++ret; + case 8: + if ((uval % 100000000) != 0) + break; + ++ret; + case 9: + if ((uval % 1000000000) != 0) + break; + ++ret; + default: { + } + } + return ret; +} + +static inline void decimal_make_zero(decimal_t *dec) +{ + dec->buf[0] = 0; + dec->intg = 1; + dec->frac = 0; + dec->sign = false; +} + +static void do_mini_left_shift(decimal_t *dec, int shift, int beg, int last) +{ + dec1 *from = dec->buf + ROUND_UP(beg + 1) - 1; + dec1 *end = dec->buf + ROUND_UP(last) - 1; + int c_shift = DIG_PER_DEC1 - shift; + if (beg % DIG_PER_DEC1 < shift) + *(from - 1) = (*from) / powers10[c_shift]; + for (; from < end; from++) + *from = ((*from % powers10[c_shift]) * powers10[shift] + (*(from + 1)) / powers10[c_shift]); + *from = (*from % powers10[c_shift]) * powers10[shift]; +} + +static void digits_bounds(const decimal_t *from, int *start_result, int *end_result) +{ + int start, stop, i; + dec1 *buf_beg = from->buf; + dec1 *end = from->buf + ROUND_UP(from->intg) + ROUND_UP(from->frac); + dec1 *buf_end = end - 1; + + /* find non-zero digit from number beginning */ + while (buf_beg < end && *buf_beg == 0) + buf_beg++; + + if (buf_beg >= end) { + /* it is zero */ + *start_result = *end_result = 0; + return; + } + + /* find non-zero decimal digit from number beginning */ + if (buf_beg == from->buf && from->intg) { + start = DIG_PER_DEC1 - (i = ((from->intg - 1) % DIG_PER_DEC1 + 1)); + i--; + } else { + i = DIG_PER_DEC1 - 1; + start = (int)((buf_beg - from->buf) * DIG_PER_DEC1); + } + if (buf_beg < end) + start += count_leading_zeroes(i, *buf_beg); + + *start_result = start; /* index of first decimal digit (from 0) */ + + /* find non-zero digit at the end */ + while (buf_end > buf_beg && *buf_end == 0) + buf_end--; + /* find non-zero decimal digit from the end */ + if (buf_end == end - 1 && from->frac) { + stop = (int)(((buf_end - from->buf) * DIG_PER_DEC1 + + (i = ((from->frac - 1) % DIG_PER_DEC1 + 1)))); + i = DIG_PER_DEC1 - i + 1; + } else { + stop = (int)((buf_end - from->buf + 1) * DIG_PER_DEC1); + i = 1; + } + stop -= count_trailing_zeroes(i, *buf_end); + *end_result = stop; /* index of position after last decimal digit (from 0) */ +} + +int decimal_is_zero(const decimal_t *from) +{ + dec1 *buf1 = from->buf, *end = buf1 + ROUND_UP(from->intg) + ROUND_UP(from->frac); + while (buf1 < end) + if (*buf1++) + return 0; + return 1; +} + +int decimal_round(const decimal_t *from, decimal_t *to, int scale, decimal_round_mode mode) +{ + int frac0 = scale > 0 ? ROUND_UP(scale) : (scale + 1) / DIG_PER_DEC1, + frac1 = ROUND_UP(from->frac), round_digit = 0, intg0 = ROUND_UP(from->intg), + error = E_DEC_OK, len = to->len; + + dec1 *buf0 = from->buf, *buf1 = to->buf, x, y, carry = 0; + int first_dig; + + sanity(to); + + switch (mode) { + case HALF_UP: + case HALF_EVEN: + round_digit = 5; + break; + case CEILING: + round_digit = from->sign ? 10 : 0; + break; + case FLOOR: + round_digit = from->sign ? 0 : 10; + break; + case TRUNCATE: + round_digit = 10; + break; + default: + assert(0); + } + + /* + For my_decimal we always use len == DECIMAL_BUFF_LENGTH == 9 + For internal testing here (ifdef MAIN) we always use len == 100/4 + */ + assert(from->len == to->len); + + if (unlikely(frac0 + intg0 > len)) { + frac0 = len - intg0; + scale = frac0 * DIG_PER_DEC1; + error = E_DEC_TRUNCATED; + } + + if (scale + from->intg < 0) { + decimal_make_zero(to); + return E_DEC_OK; + } + + if (to != from) { + dec1 *p0 = buf0 + intg0 + std::max(frac1, frac0); + dec1 *p1 = buf1 + intg0 + std::max(frac1, frac0); + + assert(p0 - buf0 <= len); + assert(p1 - buf1 <= len); + + while (buf0 < p0) + *(--p1) = *(--p0); + + buf0 = to->buf; + buf1 = to->buf; + to->sign = from->sign; + to->intg = std::min(intg0, len) * DIG_PER_DEC1; + } + + if (frac0 > frac1) { + buf1 += intg0 + frac1; + while (frac0-- > frac1) + *buf1++ = 0; + goto done; + } + + if (scale >= from->frac) + goto done; /* nothing to do */ + + buf0 += intg0 + frac0 - 1; + buf1 += intg0 + frac0 - 1; + if (scale == frac0 * DIG_PER_DEC1) { + int do_inc = false; + assert(frac0 + intg0 >= 0); + switch (round_digit) { + case 0: { + dec1 *p0 = buf0 + (frac1 - frac0); + for (; p0 > buf0; p0--) { + if (*p0) { + do_inc = true; + break; + } + } + break; + } + case 5: { + x = buf0[1] / DIG_MASK; + do_inc = + (x > 5) || ((x == 5) && (mode == HALF_UP || (frac0 + intg0 > 0 && *buf0 & 1))); + break; + } + default: + break; + } + if (do_inc) { + if (frac0 + intg0 > 0) + (*buf1)++; + else + *(++buf1) = DIG_BASE; + } else if (frac0 + intg0 == 0) { + decimal_make_zero(to); + return E_DEC_OK; + } + } else { + /* TODO - fix this code as it won't work for CEILING mode */ + int pos = frac0 * DIG_PER_DEC1 - scale - 1; + assert(frac0 + intg0 > 0); + x = *buf1 / powers10[pos]; + y = x % 10; + if (y > round_digit || (round_digit == 5 && y == 5 && (mode == HALF_UP || (x / 10) & 1))) + x += 10; + *buf1 = powers10[pos] * (x - y); + } + /* + In case we're rounding e.g. 1.5e9 to 2.0e9, the decimal_digit_t's inside + the buffer are as follows. + + Before <1, 5e8> + After <2, 5e8> + + Hence we need to set the 2nd field to 0. + The same holds if we round 1.5e-9 to 2e-9. + */ + if (frac0 < frac1) { + dec1 *buf = to->buf + ((scale == 0 && intg0 == 0) ? 1 : intg0 + frac0); + dec1 *end = to->buf + len; + + while (buf < end) + *buf++ = 0; + } + if (*buf1 >= DIG_BASE) { + carry = 1; + *buf1 -= DIG_BASE; + while (carry && --buf1 >= to->buf) + ADD(*buf1, *buf1, 0, carry); + if (unlikely(carry)) { + /* shifting the number to create space for new digit */ + if (frac0 + intg0 >= len) { + frac0--; + scale = frac0 * DIG_PER_DEC1; + error = E_DEC_TRUNCATED; /* XXX */ + } + for (buf1 = to->buf + intg0 + std::max(frac0, 0); buf1 > to->buf; buf1--) { + /* Avoid out-of-bounds write. */ + if (buf1 < to->buf + len) + buf1[0] = buf1[-1]; + else + error = E_DEC_OVERFLOW; + } + *buf1 = 1; + /* We cannot have more than 9 * 9 = 81 digits. */ + if (to->intg < len * DIG_PER_DEC1) + to->intg++; + else + error = E_DEC_OVERFLOW; + } + } else { + for (;;) { + if (likely(*buf1)) + break; + if (buf1-- == to->buf) { + /* making 'zero' with the proper scale */ + dec1 *p0 = to->buf + frac0 + 1; + to->intg = 1; + to->frac = std::max(scale, 0); + to->sign = false; + for (buf1 = to->buf; buf1 < p0; buf1++) + *buf1 = 0; + return E_DEC_OK; + } + } + } + + /* Here we check 999.9 -> 1000 case when we need to increase intg */ + first_dig = to->intg % DIG_PER_DEC1; + if (first_dig && (*buf1 >= powers10[first_dig])) + to->intg++; + + if (scale < 0) + scale = 0; + +done: + assert(to->intg <= (len * DIG_PER_DEC1)); + to->frac = scale; + return error; +} + +inline void fix_intg_frac_error(const int &len, int *intg1, int *frac1, int *error) +{ + if (*intg1 + *frac1 > len) { + if (*intg1 > len) { + *intg1 = len; + *frac1 = 0; + *error = E_DEC_OVERFLOW; + } else { + *frac1 = len - *intg1; + *error = E_DEC_TRUNCATED; + } + } else + *error = E_DEC_OK; +} + +longlong my_strtoll10(const char *nptr, const char **endptr, int *error) +{ + const char *s, *end, *start, *n_end, *true_end; + const char *dummy; + uchar c; + unsigned long i, j, k; + ulonglong li; + int negative; + ulong cutoff, cutoff2, cutoff3; + + s = nptr; + /* If fixed length string */ + if (endptr) { + end = *endptr; + while (s != end && (*s == ' ' || *s == '\t')) + s++; + if (s == end) + goto no_conv; + } else { + endptr = &dummy; /* Easier end test */ + while (*s == ' ' || *s == '\t') + s++; + if (!*s) + goto no_conv; + /* This number must be big to guard against a lot of pre-zeros */ + end = s + 65535; /* Can't be longer than this */ + } + + /* Check for a sign. */ + negative = 0; + if (*s == '-') { + *error = -1; /* Mark as negative number */ + negative = 1; + if (++s == end) + goto no_conv; + cutoff = MAX_NEGATIVE_NUMBER / LFACTOR2; + cutoff2 = (MAX_NEGATIVE_NUMBER % LFACTOR2) / 100; + cutoff3 = MAX_NEGATIVE_NUMBER % 100; + } else { + *error = 0; + if (*s == '+') { + if (++s == end) + goto no_conv; + } + cutoff = ULLONG_MAX / LFACTOR2; + cutoff2 = ULLONG_MAX % LFACTOR2 / 100; + cutoff3 = ULLONG_MAX % 100; + } + + /* Handle case where we have a lot of pre-zero */ + if (*s == '0') { + i = 0; + do { + if (++s == end) + goto end_i; /* Return 0 */ + } while (*s == '0'); + n_end = s + INIT_CNT; + } else { + /* Read first digit to check that it's a valid number */ + if ((c = (*s - '0')) > 9) + goto no_conv; + i = c; + n_end = ++s + INIT_CNT - 1; + } + + /* Handle first 9 digits and store them in i */ + if (n_end > end) + n_end = end; + for (; s != n_end; s++) { + if ((c = (*s - '0')) > 9) + goto end_i; + i = i * 10 + c; + } + if (s == end) + goto end_i; + + /* Handle next 9 digits and store them in j */ + j = 0; + start = s; /* Used to know how much to shift i */ + n_end = true_end = s + INIT_CNT; + if (n_end > end) + n_end = end; + do { + if ((c = (*s - '0')) > 9) + goto end_i_and_j; + j = j * 10 + c; + } while (++s != n_end); + if (s == end) { + if (s != true_end) + goto end_i_and_j; + goto end3; + } + if ((c = (*s - '0')) > 9) + goto end3; + + /* Handle the next 1 or 2 digits and store them in k */ + k = c; + if (++s == end || (c = (*s - '0')) > 9) + goto end4; + k = k * 10 + c; + *endptr = ++s; + + /* number string should have ended here */ + if (s != end && (c = (*s - '0')) <= 9) + goto overflow; + + /* Check that we didn't get an overflow with the last digit */ + if (i > cutoff || (i == cutoff && (j > cutoff2 || (j == cutoff2 && k > cutoff3)))) + goto overflow; + li = i * LFACTOR2 + (ulonglong)j * 100 + k; + return (longlong)li; + +overflow: /* *endptr is set here */ + *error = MY_ERRNO_ERANGE; + return negative ? LLONG_MIN : (longlong)ULLONG_MAX; + +end_i: + *endptr = s; + return (negative ? ((longlong) - (long)i) : (longlong)i); + +end_i_and_j: + li = (ulonglong)i * lfactor[(uint)(s - start)] + j; + *endptr = s; + return (negative ? -((longlong)li) : (longlong)li); + +end3: + li = (ulonglong)i * LFACTOR + (ulonglong)j; + *endptr = s; + return (negative ? -((longlong)li) : (longlong)li); + +end4: + li = (ulonglong)i * LFACTOR1 + (ulonglong)j * 10 + k; + *endptr = s; + if (negative) { + if (li > MAX_NEGATIVE_NUMBER) + goto overflow; + if (li == MAX_NEGATIVE_NUMBER) + return LLONG_MIN; + return -((longlong)li); + } + return (longlong)li; + +no_conv: + /* There was no number to convert. */ + *error = MY_ERRNO_EDOM; + *endptr = nptr; + return 0; +} + +int decimal_shift(decimal_t *dec, int shift) +{ + /* index of first non zero digit (all indexes from 0) */ + int beg; + /* index of position after last decimal digit */ + int end; + /* index of digit position just after point */ + int point = ROUND_UP(dec->intg) * DIG_PER_DEC1; + /* new point position */ + int new_point = point + shift; + /* length of result and new fraction in big digits */ + int new_len, new_frac_len; + /* return code */ + int err = E_DEC_OK; + int new_front; + + if (shift == 0) + return E_DEC_OK; + + digits_bounds(dec, &beg, &end); + + if (beg == end) { + decimal_make_zero(dec); + return E_DEC_OK; + } + + /* number of digits in result */ + int digits_int = std::max(new_point - beg, 0); + int digits_frac = std::max(end - new_point, 0); + + if ((new_len = ROUND_UP(digits_int) + (new_frac_len = ROUND_UP(digits_frac))) > dec->len) { + int lack = new_len - dec->len; + int diff; + + if (new_frac_len < lack) + return E_DEC_OVERFLOW; /* lack more then we have in fraction */ + + /* cat off fraction part to allow new number to fit in our buffer */ + err = E_DEC_TRUNCATED; + new_frac_len -= lack; + diff = digits_frac - (new_frac_len * DIG_PER_DEC1); + /* Make rounding method as parameter? */ + decimal_round(dec, dec, end - point - diff, HALF_UP); + end -= diff; + digits_frac = new_frac_len * DIG_PER_DEC1; + + if (end <= beg) { + /* + we lost all digits (they will be shifted out of buffer), so we can + just return 0 + */ + decimal_make_zero(dec); + return E_DEC_TRUNCATED; + } + } + + if (shift % DIG_PER_DEC1) { + int l_mini_shift, r_mini_shift, mini_shift; + int do_left; + /* + Calculate left/right shift to align decimal digits inside our bug + digits correctly + */ + if (shift > 0) { + l_mini_shift = shift % DIG_PER_DEC1; + r_mini_shift = DIG_PER_DEC1 - l_mini_shift; + /* + It is left shift so prefer left shift, but if we have not place from + left, we have to have it from right, because we checked length of + result + */ + do_left = l_mini_shift <= beg; + } else { + r_mini_shift = (-shift) % DIG_PER_DEC1; + l_mini_shift = DIG_PER_DEC1 - r_mini_shift; + /* see comment above */ + do_left = !((dec->len * DIG_PER_DEC1 - end) >= r_mini_shift); + } + if (do_left) { + do_mini_left_shift(dec, l_mini_shift, beg, end); + mini_shift = -l_mini_shift; + } else { + do_mini_right_shift(dec, r_mini_shift, beg, end); + mini_shift = r_mini_shift; + } + new_point += mini_shift; + /* + If number is shifted and correctly aligned in buffer we can + finish + */ + if (!(shift += mini_shift) && (new_point - digits_int) < DIG_PER_DEC1) { + dec->intg = digits_int; + dec->frac = digits_frac; + return err; /* already shifted as it should be */ + } + beg += mini_shift; + end += mini_shift; + } + + /* if new 'decimal front' is in first digit, we do not need move digits */ + if ((new_front = (new_point - digits_int)) >= DIG_PER_DEC1 || new_front < 0) { + /* need to move digits */ + int d_shift; + dec1 *to, *barier; + if (new_front > 0) { + /* move left */ + d_shift = new_front / DIG_PER_DEC1; + to = dec->buf + (ROUND_UP(beg + 1) - 1 - d_shift); + barier = dec->buf + (ROUND_UP(end) - 1 - d_shift); + for (; to <= barier; to++) + *to = *(to + d_shift); + for (barier += d_shift; to <= barier; to++) + *to = 0; + d_shift = -d_shift; + } else { + /* move right */ + d_shift = (1 - new_front) / DIG_PER_DEC1; + to = dec->buf + ROUND_UP(end) - 1 + d_shift; + barier = dec->buf + ROUND_UP(beg + 1) - 1 + d_shift; + for (; to >= barier; to--) + *to = *(to - d_shift); + for (barier -= d_shift; to >= barier; to--) + *to = 0; + } + d_shift *= DIG_PER_DEC1; + beg += d_shift; + end += d_shift; + new_point += d_shift; + } + + /* + If there are gaps then fill ren with 0. + + Only one of following 'for' loops will work because beg <= end + */ + beg = ROUND_UP(beg + 1) - 1; + end = ROUND_UP(end) - 1; + + /* We don't want negative new_point below */ + if (new_point != 0) + new_point = ROUND_UP(new_point) - 1; + + if (new_point > end) { + do { + dec->buf[new_point] = 0; + } while (--new_point > end); + } else { + for (; new_point < beg; new_point++) + dec->buf[new_point] = 0; + } + dec->intg = digits_int; + dec->frac = digits_frac; + return err; +} + +int string2decimal(const char *from, decimal_t *to, const char **end) +{ + const char *s = from, *s1, *endp, *end_of_string = *end; + int i, intg, frac, error, intg1, frac1; + dec1 x, *buf; + sanity(to); + + error = E_DEC_BAD_NUM; /* In case of bad number */ + while (s < end_of_string && isspace(*s)) + s++; + if (s == end_of_string) + goto fatal_error; + + // Skip leading zeros. + while (s < (end_of_string - 1) && s[0] == '0' && s[1] == '0') + s++; + + if ((to->sign = (*s == '-'))) + s++; + else if (*s == '+') + s++; + + s1 = s; + while (s < end_of_string && isdigit(*s)) + s++; + intg = (int)(s - s1); + if (s < end_of_string && *s == '.') { + endp = s + 1; + while (endp < end_of_string && isdigit(*endp)) + endp++; + frac = (int)(endp - s - 1); + } else { + frac = 0; + endp = s; + } + + *end = endp; + if (frac + intg == 0) + goto fatal_error; + + error = 0; + + intg1 = ROUND_UP(intg); + frac1 = ROUND_UP(frac); + fix_intg_frac_error(to->len, &intg1, &frac1, &error); + if (unlikely(error)) { + frac = frac1 * DIG_PER_DEC1; + if (error == E_DEC_OVERFLOW) + intg = intg1 * DIG_PER_DEC1; + } + + /* Error is guaranteed to be set here */ + to->intg = intg; + to->frac = frac; + + buf = to->buf + intg1; + s1 = s; + + for (x = 0, i = 0; intg; intg--) { + x += (*--s - '0') * powers10[i]; + + if (unlikely(++i == DIG_PER_DEC1)) { + *--buf = x; + x = 0; + i = 0; + } + } + if (i) + *--buf = x; + + buf = to->buf + intg1; + for (x = 0, i = 0; frac; frac--) { + x = (*++s1 - '0') + x * 10; + + if (unlikely(++i == DIG_PER_DEC1)) { + *buf++ = x; + x = 0; + i = 0; + } + } + if (i) + *buf = x * powers10[DIG_PER_DEC1 - i]; + + /* Handle exponent */ + if (endp + 1 < end_of_string && (*endp == 'e' || *endp == 'E')) { + int str_error; + longlong exponent = my_strtoll10(endp + 1, &end_of_string, &str_error); + + if (end_of_string != endp + 1) /* If at least one digit */ + { + *end = end_of_string; + if (str_error > 0) { + error = E_DEC_BAD_NUM; + goto fatal_error; + } + if (exponent > INT_MAX / 2 || (str_error == 0 && exponent < 0)) { + error = E_DEC_OVERFLOW; + goto fatal_error; + } + if (exponent < INT_MIN / 2 && error != E_DEC_OVERFLOW) { + error = E_DEC_TRUNCATED; + goto fatal_error; + } + if (error != E_DEC_OVERFLOW) + error = decimal_shift(to, (int)exponent); + } + } + /* Avoid returning negative zero, cfr. decimal_cmp() */ + if (to->sign && decimal_is_zero(to)) + to->sign = false; + return error; + +fatal_error: + decimal_make_zero(to); + return error; +} + +int decimal2bin(const decimal_t *from, uchar *to, int precision, int frac) +{ + dec1 mask = from->sign ? -1 : 0, *buf1 = from->buf, *stop1; + int error = E_DEC_OK, intg = precision - frac, isize1, intg1, intg1x, from_intg, + intg0 = intg / DIG_PER_DEC1, frac0 = frac / DIG_PER_DEC1, + intg0x = intg - intg0 * DIG_PER_DEC1, frac0x = frac - frac0 * DIG_PER_DEC1, + frac1 = from->frac / DIG_PER_DEC1, frac1x = from->frac - frac1 * DIG_PER_DEC1, + isize0 = intg0 * sizeof(dec1) + dig2bytes[intg0x], + fsize0 = frac0 * sizeof(dec1) + dig2bytes[frac0x], + fsize1 = frac1 * sizeof(dec1) + dig2bytes[frac1x]; + const int orig_isize0 = isize0; + const int orig_fsize0 = fsize0; + uchar *orig_to = to; + + buf1 = remove_leading_zeroes(from, &from_intg); + + if (unlikely(from_intg + fsize1 == 0)) { + mask = 0; /* just in case */ + intg = 1; + buf1 = &mask; + } + + intg1 = from_intg / DIG_PER_DEC1; + intg1x = from_intg - intg1 * DIG_PER_DEC1; + isize1 = intg1 * sizeof(dec1) + dig2bytes[intg1x]; + + if (intg < from_intg) { + buf1 += intg1 - intg0 + (intg1x > 0) - (intg0x > 0); + intg1 = intg0; + intg1x = intg0x; + error = E_DEC_OVERFLOW; + } else if (isize0 > isize1) { + while (isize0-- > isize1) + *to++ = (char)mask; + } + if (fsize0 < fsize1) { + frac1 = frac0; + frac1x = frac0x; + error = E_DEC_TRUNCATED; + } else if (fsize0 > fsize1 && frac1x) { + if (frac0 == frac1) { + frac1x = frac0x; + fsize0 = fsize1; + } else { + frac1++; + frac1x = 0; + } + } + + /* intg1x part */ + if (intg1x) { + int i = dig2bytes[intg1x]; + dec1 x = mod_by_pow10(*buf1++, intg1x) ^ mask; + switch (i) { + case 1: + mi_int1store(to, x); + break; + case 2: + mi_int2store(to, x); + break; + case 3: + mi_int3store(to, x); + break; + case 4: + mi_int4store(to, x); + break; + default: + break; + } + to += i; + } + + /* intg1+frac1 part */ + for (stop1 = buf1 + intg1 + frac1; buf1 < stop1; to += sizeof(dec1)) { + dec1 x = *buf1++ ^ mask; + mi_int4store(to, x); + } + + /* frac1x part */ + if (frac1x) { + dec1 x; + int i = dig2bytes[frac1x], lim = (frac1 < frac0 ? DIG_PER_DEC1 : frac0x); + while (frac1x < lim && dig2bytes[frac1x] == i) + frac1x++; + x = div_by_pow10(*buf1, DIG_PER_DEC1 - frac1x) ^ mask; + switch (i) { + case 1: + mi_int1store(to, x); + break; + case 2: + mi_int2store(to, x); + break; + case 3: + mi_int3store(to, x); + break; + case 4: + mi_int4store(to, x); + break; + default: + break; + } + to += i; + } + if (fsize0 > fsize1) { + uchar *to_end = orig_to + orig_fsize0 + orig_isize0; + + while (fsize0-- > fsize1 && to < to_end) + *to++ = (uchar)mask; + } + orig_to[0] ^= 0x80; + + return error; +} diff --git a/binlogconvert/src/utils/my_time.cpp b/binlogconvert/src/utils/my_time.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6cf08e48b3d858d12f10423a7fe5dad8142b3c94 --- /dev/null +++ b/binlogconvert/src/utils/my_time.cpp @@ -0,0 +1,758 @@ +/* Copyright (c) 2004, 2022, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, + as published by the Free Software Foundation. + + This program is also distributed with certain software (including + but not limited to OpenSSL) that is licensed under separate terms, + as designated in a particular file or component or in included license + documentation. The authors of MySQL hereby grant you an additional + permission to link the program and your derivative works with the + separately licensed software that they have included with MySQL. + + Without limiting anything contained in the foregoing, this file, + which is part of C Driver for MySQL (Connector/C), is also subject to the + Universal FOSS Exception, version 1.0, a copy of which can be found at + http://oss.oracle.com/licenses/universal-foss-exception. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License, version 2.0, for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +// refer to: mysys/my_time.cc +#include "utils/my_time.h" + +#include +#include + +#include +#include +#include +#include + +#include "common/logging.h" + +#define TIMEF_OFS 0x800000000000LL +#define TIMEF_INT_OFS 0x800000LL +#define DATETIMEF_INT_OFS 0x8000000000LL +#define EPOCH_YEAR 1970 +#define LEAPS_THRU_END_OF(y) ((y) / 4 - (y) / 100 + (y) / 400) +#define isleap(y) (((y) % 4) == 0 && (((y) % 100) != 0 || ((y) % 400) == 0)) + +constexpr const int SECS_PER_MIN = 60; +constexpr const int HOURS_PER_DAY = 24; +constexpr const int DAYS_PER_WEEK = 7; +constexpr const int DAYS_PER_NYEAR = 365; +constexpr const int DAYS_PER_LYEAR = 366; +constexpr const int SECS_PER_HOUR = (SECS_PER_MIN * MINS_PER_HOUR); +constexpr const int SECS_PER_DAY = (SECS_PER_HOUR * HOURS_PER_DAY); +constexpr const int MONS_PER_YEAR = 12; +constexpr const int MAX_TIME_ZONE_HOURS = 14; +#define MAX_DATE_PARTS 8 + +const ulonglong log_10_int[20] = {1, + 10, + 100, + 1000, + 10000UL, + 100000UL, + 1000000UL, + 10000000UL, + 100000000ULL, + 1000000000ULL, + 10000000000ULL, + 100000000000ULL, + 1000000000000ULL, + 10000000000000ULL, + 100000000000000ULL, + 1000000000000000ULL, + 10000000000000000ULL, + 100000000000000000ULL, + 1000000000000000000ULL, + 10000000000000000000ULL}; + +static constexpr const char time_separator = ':'; +static constexpr ulong const days_at_timestart = 719528; + +static uint64_t my_time_zone = 0; + +static const uint mon_starts[2][MONS_PER_YEAR] = { + {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334}, + {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335}}; + +static longlong my_packed_time_make(longlong i, longlong f) +{ + if (std::abs(f) > 0xffffffLL) { + // LOG_ERROR("TIME field should be not exceed 0xffffff..."); + return -1; + } + return (static_cast(i) << 24) + f; +} + +inline bool is_time_t_valid_for_timestamp(time_t x) +{ + return (static_cast(x) <= static_cast(MYTIME_MAX_VALUE) && + x >= MYTIME_MIN_VALUE); +} + +static longlong my_packed_time_get_int_part(longlong i) +{ + return (i >> 24); +} + +static inline int isspace_char(char ch) +{ + return std::isspace(static_cast(ch)); +} + +static inline int isdigit_char(char ch) +{ + return std::isdigit(static_cast(ch)); +} + +static inline int ispunct_char(char ch) +{ + return std::ispunct(static_cast(ch)); +} + +long calc_daynr(uint year, uint month, uint day) +{ + long delsum; + int temp; + int y = year; /* may be < 0 temporarily */ + + if (y == 0 && month == 0) + return 0; /* Skip errors */ + /* Cast to int to be able to handle month == 0 */ + delsum = + static_cast(365 * y + 31 * (static_cast(month) - 1) + static_cast(day)); + if (month <= 2) + y--; + else + delsum -= static_cast(static_cast(month) * 4 + 23) / 10; + temp = ((y / 100 + 1) * 3) / 4; + if (delsum + static_cast(y) / 4 - temp < 0) { + // LOG_ERROR("TIME field when cal days, error"); + return -1; + } + return (delsum + static_cast(y) / 4 - temp); +} /* calc_daynr */ + +int64_t my_system_gmt_sec(const MYSQL_TIME &my_time, int64_t *my_timezone) +{ + uint loop; + time_t tmp = 0; + int shift = 0; + MYSQL_TIME tmp_time; + MYSQL_TIME *t = &tmp_time; + struct tm *l_time; + struct tm tm_tmp; + uint64_t diff, current_timezone; + + tmp_time = my_time; + + if ((t->year == 9999) && (t->month == 1) && (t->day > 4)) { + t->day -= 2; + shift = 2; + } + + int64_t tmp_days = calc_daynr(static_cast(t->year), static_cast(t->month), + static_cast(t->day)); + tmp_days = tmp_days - static_cast(days_at_timestart); + int64_t tmp_seconds = + tmp_days * SECONDS_IN_24H + + (static_cast(t->hour) * 3600 + static_cast(t->minute * 60 + t->second)); + // This will be a narrowing on 32 bit time platforms, but checked range + // above + tmp = static_cast(tmp_seconds + my_time_zone - 3600); + + current_timezone = my_time_zone; + localtime_r(&tmp, &tm_tmp); + l_time = &tm_tmp; + for (loop = 0; loop < 2 && (t->hour != static_cast(l_time->tm_hour) || + t->minute != static_cast(l_time->tm_min) || + t->second != static_cast(l_time->tm_sec)); + loop++) { /* One check should be enough ? */ + /* Get difference in days */ + int days = t->day - l_time->tm_mday; + if (days < -1) + days = 1; /* Month has wrapped */ + else if (days > 1) + days = -1; + diff = + (3600L * static_cast(days * 24 + (static_cast(t->hour) - l_time->tm_hour)) + + static_cast(60 * (static_cast(t->minute) - l_time->tm_min)) + + static_cast(static_cast(t->second) - l_time->tm_sec)); + current_timezone += diff + 3600; /* Compensate for -3600 above */ + tmp += static_cast(diff); + localtime_r(&tmp, &tm_tmp); + l_time = &tm_tmp; + } + + if (loop == 2 && t->hour != static_cast(l_time->tm_hour)) { + int days = t->day - l_time->tm_mday; + if (days < -1) + days = 1; /* Month has wrapped */ + else if (days > 1) + days = -1; + diff = + (3600L * static_cast(days * 24 + (static_cast(t->hour) - l_time->tm_hour)) + + static_cast(60 * (static_cast(t->minute) - l_time->tm_min)) + + static_cast(static_cast(t->second) - l_time->tm_sec)); + if (diff == 3600) + tmp += 3600 - t->minute * 60 - t->second; /* Move to next hour */ + else if (diff == -3600) + tmp -= t->minute * 60 + t->second; /* Move to previous hour */ + } + *my_timezone = current_timezone; + + /* shift back, if we were dealing with boundary dates */ + tmp += shift * SECONDS_IN_24H; + + if (!is_time_t_valid_for_timestamp(tmp)) + tmp = 0; + + return static_cast(tmp); +} + +bool check_datetime_range(const MYSQL_TIME &my_time) +{ + /* + In case of MYSQL_TIMESTAMP_TIME hour value can be up to TIME_MAX_HOUR. + In case of MYSQL_TIMESTAMP_DATETIME it cannot be bigger than 23. + */ + return my_time.year > 9999U || my_time.month > 12U || my_time.day > 31U || + my_time.minute > 59U || my_time.second > 59U || my_time.second_part > 999999U || + (my_time.hour > (my_time.time_type == MYSQL_TIMESTAMP_TIME ? TIME_MAX_HOUR : 23U)); +} + +bool time_zone_displacement_to_seconds(const char *str, size_t length, int *result) +{ + if (length < 6) + return true; + + int sign = str[0] == '+' ? 1 : (str[0] == '-' ? -1 : 0); + if (sign == 0) + return true; + + if (!(std::isdigit(str[1]) && std::isdigit(str[2]))) + return true; + int hours = (str[1] - '0') * 10 + str[2] - '0'; + + if (str[3] != ':') + return true; + + if (!(std::isdigit(str[4]) && std::isdigit(str[5]))) + return true; + int minutes = (str[4] - '0') * 10 + str[5] - '0'; + if (minutes >= MINS_PER_HOUR) + return true; + int seconds = hours * SECS_PER_HOUR + minutes * SECS_PER_MIN; + + if (seconds > MAX_TIME_ZONE_HOURS * SECS_PER_HOUR) + return true; + + // The SQL standard forbids -00:00. + if (sign == -1 && hours == 0 && minutes == 0) + return true; + + for (size_t i = 6; i < length; ++i) + if (!std::isspace(str[i])) + return true; + + *result = seconds * sign; + return false; +} + +longlong TIME_to_longlong_time_packed(const MYSQL_TIME &my_time) +{ + /* If month is 0, we mix day with hours: "1 00:10:10" -> "24:00:10" */ + long hms = (((my_time.month ? 0 : my_time.day * 24) + my_time.hour) << 12) | + (my_time.minute << 6) | my_time.second; + longlong tmp = my_packed_time_make(hms, my_time.second_part); + return my_time.neg ? -tmp : tmp; +} + +void my_time_packed_to_binary(longlong nr, uchar *ptr, uint dec) +{ + bool flag1 = (dec <= DATETIME_MAX_DECIMALS); + bool flag2 = ((my_packed_time_get_frac_part(nr) % + static_cast(log_10_int[DATETIME_MAX_DECIMALS - dec])) == 0); + + if (!flag1 || !flag2) { + LOG_ERROR( + "[TIME FIELD] Make sure the stored value was previously properly rounded or truncated"); + return; + } + + switch (dec) { + case 0: + default: + mi_int3store(ptr, TIMEF_INT_OFS + my_packed_time_get_int_part(nr)); + break; + + case 1: + case 2: + mi_int3store(ptr, TIMEF_INT_OFS + my_packed_time_get_int_part(nr)); + ptr[3] = static_cast( + static_cast(my_packed_time_get_frac_part(nr) / 10000)); + break; + + case 4: + case 3: + mi_int3store(ptr, TIMEF_INT_OFS + my_packed_time_get_int_part(nr)); + mi_int2store(ptr + 3, my_packed_time_get_frac_part(nr) / 100); + break; + + case 5: + case 6: + mi_int6store(ptr, nr + TIMEF_OFS); + break; + } +} + +longlong TIME_to_longlong_datetime_packed(const MYSQL_TIME &my_time) +{ + longlong ymd = ((my_time.year * 13 + my_time.month) << 5) | my_time.day; + longlong hms = (my_time.hour << 12) | (my_time.minute << 6) | my_time.second; + longlong tmp = my_packed_time_make(((ymd << 17) | hms), my_time.second_part); + if (check_datetime_range(my_time)) { + + // LOG_ERROR("[DATETIME field] Make sure no overflow"); + return -1; + } + return my_time.neg ? -tmp : tmp; +} + +void my_datetime_packed_to_binary(longlong nr, uchar *ptr, uint dec) +{ + if (dec > DATETIME_MAX_DECIMALS) { + // LOG_ERROR("[DATETIME field] dec exceeds the maximum allowed value of + // DATETIME_MAX_DECIMALS"); + return; + } + + if (!((my_packed_time_get_frac_part(nr) % + static_cast(log_10_int[DATETIME_MAX_DECIMALS - dec])) == 0)) { + // LOG_ERROR("[DATETIME field] The value being stored must have been properly rounded or + // truncated"); + return; + } + + mi_int5store(ptr, my_packed_time_get_int_part(nr) + DATETIMEF_INT_OFS); + switch (dec) { + case 0: + default: + break; + case 1: + case 2: + ptr[5] = static_cast( + static_cast(my_packed_time_get_frac_part(nr) / 10000)); + break; + case 3: + case 4: + mi_int2store(ptr + 5, my_packed_time_get_frac_part(nr) / 100); + break; + case 5: + case 6: + mi_int3store(ptr + 5, my_packed_time_get_frac_part(nr)); + } +} + +void my_timestamp_to_binary(const my_timeval *tm, uchar *ptr, uint dec) +{ + bool flag1 = (dec <= DATETIME_MAX_DECIMALS); + bool flag2 = ((tm->m_tv_usec % static_cast(log_10_int[DATETIME_MAX_DECIMALS - dec])) == 0); + + if (!flag1 || !flag2) { + // LOG_ERROR("[DATETIME field] Stored value must have been previously properly rounded or + // truncated"); + return; + } + + mi_int4store(ptr, tm->m_tv_sec); + switch (dec) { + case 0: + default: + break; + case 1: + case 2: + ptr[4] = static_cast(static_cast(tm->m_tv_usec / 10000)); + break; + case 3: + case 4: + mi_int2store(ptr + 4, tm->m_tv_usec / 100); + break; + /* Impossible second precision. Fall through */ + case 5: + case 6: + mi_int3store(ptr + 4, tm->m_tv_usec); + } +} + +/* + [-] DAYS [H]H:MM:SS, [H]H:MM:SS, [H]H:MM, [H]HMMSS,[M]MSS or [S]S +*/ +void str_to_time(const char *str, std::size_t length, MYSQL_TIME *l_time) +{ + ulong date[5]; + ulonglong value; + uint state; + bool seen_colon = false; + const char *end = str + length; + const char *end_of_days; + bool found_days; + bool found_hours; + const char *start; + const char *str_arg = str; + + l_time->time_type = MYSQL_TIMESTAMP_NONE; + l_time->neg = false; + + for (; str != end && isspace_char(*str); str++) { + length--; + } + + if (str != end && *str == '-') { + l_time->neg = true; + str++; + length--; + } + + if (str == end) + return; + start = str; + + for (value = 0; str != end && isdigit_char(*str); str++) + value = value * 10L + static_cast(*str - '0'); + if (value > UINT_MAX) + return; + end_of_days = str; + + int spaces = 0; + for (; str != end && isspace_char(str[0]); str++) + spaces++; + + state = 0; + found_days = found_hours = false; + if (static_cast(end - str) > 1 && str != end_of_days && isdigit_char(*str)) { + date[0] = static_cast(value); + state = 1; + found_days = true; + } else if ((end - str) > 1 && *str == time_separator && isdigit_char(str[1])) { + date[0] = 0; + date[1] = static_cast(value); + state = 2; + found_hours = true; + str++; /* skip ':' */ + seen_colon = true; + } else { + /* String given as one number; assume HHMMSS format */ + date[0] = 0; + date[1] = static_cast(value / 10000); + date[2] = static_cast(value / 100 % 100); + date[3] = static_cast(value % 100); + state = 4; + goto fractional; + } + + for (;;) { + for (value = 0; str != end && isdigit_char(*str); str++) + value = value * 10L + static_cast(*str - '0'); + date[state++] = value; + if (state == 4 || (end - str) < 2 || *str != time_separator || !isdigit_char(str[1])) + break; + str++; + seen_colon = true; + } + + if (state != 4) { + memset((date + state), 0, sizeof(long) * (4 - state)); + } + +fractional: + if ((end - str) >= 2 && *str == '.' && isdigit_char(str[1])) { + int field_length = 5; + str++; + value = static_cast(static_cast(*str - '0')); + while (++str != end && isdigit_char(*str)) { + if (field_length-- > 0) + value = value * 10 + static_cast(static_cast(*str - '0')); + } + if (field_length >= 0) { + if (field_length > 0) + value *= static_cast(log_10_int[field_length]); + } else { + for (; str != end && isdigit_char(*str); str++) { + } + date[4] = static_cast(value); + } + } else if ((end - str) == 1 && *str == '.') { + str++; + date[4] = 0; + } else + date[4] = 0; + + l_time->year = 0; + l_time->month = 0; + l_time->day = 0; + l_time->hour = date[1] + date[0] * 24; + l_time->minute = date[2]; + l_time->second = date[3]; + l_time->second_part = date[4]; + + l_time->time_type = MYSQL_TIMESTAMP_TIME; + l_time->time_zone_displacement = 0; + return; +} + +/* + YYMMDD, YYYYMMDD, YYMMDDHHMMSS, YYYYMMDDHHMMSS + YY-MM-DD, YYYY-MM-DD, YY-MM-DD HH.MM.SS + YYYYMMDDTHHMMSS +*/ +void str_to_datetime(const char *str_arg, std::size_t length, MYSQL_TIME *l_time) +{ + uint field_length = 0; + uint year_length = 0; + uint digits; + uint number_of_fields; + uint date[MAX_DATE_PARTS]; + uint date_len[MAX_DATE_PARTS]; + uint start_loop; + ulong not_zero_date; + bool is_internal_format = false; + const char *pos; + const char *last_field_pos = nullptr; + const char *end = str_arg + length; + bool found_delimiter = false; + bool found_space = false; + bool found_displacement = false; + uint frac_pos; + uint frac_len; + int displacement = 0; + const char *str = str_arg; + + for (; str != end && isspace_char(*str); str++) + ; // 跳过空格 + + if (str == end || !isdigit_char(*str)) + return; + + is_internal_format = false; // internal format表示只有数字没有分隔符 + + for (pos = str; pos != end && (isdigit_char(*pos) || *pos == 'T'); pos++) + ; + + digits = static_cast(pos - str); // 第一个part的数字有多少位 + start_loop = 0; /* Start of scan loop */ + date_len[0] = 0; /* Length of year field */ + + if (pos == end || *pos == '.') { + /* Found date in internal format (only numbers like YYYYMMDD) */ + year_length = (digits == 4 || digits == 8 || digits >= 14) ? 4 : 2; + field_length = year_length; + is_internal_format = true; + } else { + field_length = 4; + } + + not_zero_date = 0; + uint i; + /* + 一个循环代表一个part + */ + for (i = start_loop; i < MAX_DATE_PARTS - 1 && str != end && isdigit_char(*str); i++) { + const char *start = str; + ulong tmp_value = static_cast(*str++ - '0'); + bool scan_until_delim = !is_internal_format && (i != 6); + + while (str != end && isdigit_char(str[0]) && (scan_until_delim || --field_length)) { + tmp_value = tmp_value * 10 + static_cast(static_cast(*str - '0')); + str++; + } + date_len[i] = static_cast(str - start); + date[i] = tmp_value; + not_zero_date |= tmp_value; + + field_length = 2; // 年份之后每个field的长度都为2 + if ((last_field_pos = str) == end) { + i++; + break; + } + if (i == 2 && *str == 'T') { + str++; + continue; + } + if (i == 5) { + if (*str == '.') { + str++; + last_field_pos = str; + field_length = 6; /* 6 digits */ + } else if (isdigit_char(str[0])) { + i++; + break; + } else if (str[0] == '+' || str[0] == '-') { + if (!time_zone_displacement_to_seconds(str, end - str, &displacement)) { + found_displacement = true; + str += end - str; + last_field_pos = str; + } else { + l_time->time_type = MYSQL_TIMESTAMP_NONE; + return; + } + } + continue; + } + if (i == 6 && (str[0] == '+' || str[0] == '-')) { + if (!time_zone_displacement_to_seconds(str, end - str, &displacement)) { + found_displacement = true; + str += end - str; + last_field_pos = str; + } else { + return; + } + } + + bool one_delim_seen = false; + while (str != end && (ispunct_char(*str) || isspace_char(*str))) { + if (isspace_char(*str)) { + found_space = true; + } + str++; + one_delim_seen = true; + found_delimiter = true; + } + if (i == 6) { + i++; + } + last_field_pos = str; + } + + str = last_field_pos; + number_of_fields = i; + + while (i < MAX_DATE_PARTS) { + date_len[i] = 0; + date[i++] = 0; + } + + if (!is_internal_format) { + year_length = date_len[0]; + + l_time->year = date[static_cast(0)]; + l_time->month = date[static_cast(1)]; + l_time->day = date[static_cast(2)]; + l_time->hour = date[static_cast(3)]; + l_time->minute = date[static_cast(4)]; + l_time->second = date[static_cast(5)]; + l_time->time_zone_displacement = displacement; + + frac_pos = static_cast(6); + frac_len = date_len[frac_pos]; + if (frac_len < 6) + date[frac_pos] *= static_cast(log_10_int[DATETIME_MAX_DECIMALS - frac_len]); + l_time->second_part = date[frac_pos]; + } else { + l_time->year = date[0]; + l_time->month = date[1]; + l_time->day = date[2]; + l_time->hour = date[3]; + l_time->minute = date[4]; + l_time->second = date[5]; + if (date_len[6] < 6) + date[6] *= static_cast(log_10_int[DATETIME_MAX_DECIMALS - date_len[6]]); + l_time->second_part = date[6]; + l_time->time_zone_displacement = displacement; + } + l_time->neg = false; + + if (year_length == 2 && not_zero_date) + l_time->year += (l_time->year < 70 ? 2000 : 1900); + + l_time->time_type = (number_of_fields <= 3 ? MYSQL_TIMESTAMP_DATE + : (found_displacement ? MYSQL_TIMESTAMP_DATETIME_TZ + : MYSQL_TIMESTAMP_DATETIME)); + + if (str != end && (str[0] == '+' || str[0] == '-')) { + l_time->time_type = MYSQL_TIMESTAMP_DATETIME_TZ; + l_time->time_zone_displacement = displacement; + return; + } + return; +} + +void int_to_date(const char *date_arg, std::size_t length, MYSQL_TIME *l_time) +{ + const int date = *reinterpret_cast(date_arg); + + l_time->year = date / 10000; + l_time->month = date % 10000 / 100; + l_time->day = date % 10000 % 100; +} + +void double_to_time(const char *time_arg, std::size_t length, MYSQL_TIME *l_time) +{ + std::string timeStr(time_arg, length); + std::string intPart; + std::string decimalPart; + + int intPartNum, decimalPartNum; + size_t dotPos = timeStr.find('.'); + if (dotPos != std::string::npos) { + intPart = timeStr.substr(0, dotPos); + decimalPart = timeStr.substr(dotPos + 1); + if (!decimalPart.empty()) { + LOG_DEBUG("time info: frac"); + } + intPartNum = std::atoi(intPart.c_str()); + decimalPartNum = std::atoi(decimalPart.c_str()); + } else { + // time is integer + intPartNum = std::abs(std::atoi(timeStr.c_str())); + decimalPartNum = 0; + } + // time is negtive + if (!timeStr.empty() && timeStr[0] == '-') { + l_time->neg = true; + } + + l_time->hour = intPartNum / 10000; + l_time->minute = intPartNum % 10000 / 100; + l_time->second = intPartNum % 10000 % 100; + l_time->second_part = decimalPartNum; + // std::cout << " h " << l_time->hour << " m " << l_time->minute << " s: " << l_time->second << + // std::endl; +} + +void datetime_to_timeval(const MYSQL_TIME *ltime, my_timeval *tm) +{ + // FIXME need to consider time_zone + int64_t not_used = 0; + tm->m_tv_sec = my_system_gmt_sec(*ltime, ¬_used); + tm->m_tv_usec = ltime->second_part; +} + +longlong TIME_to_longlong_packed(const MYSQL_TIME &my_time) +{ + switch (my_time.time_type) { + case MYSQL_TIMESTAMP_DATETIME_TZ: + return -1; // this time type should not be enter in + case MYSQL_TIMESTAMP_DATETIME: + return TIME_to_longlong_datetime_packed(my_time); + case MYSQL_TIMESTAMP_TIME: + return TIME_to_longlong_time_packed(my_time); + case MYSQL_TIMESTAMP_DATE: + LOG_ERROR("DATE type will not come here"); + case MYSQL_TIMESTAMP_NONE: + case MYSQL_TIMESTAMP_ERROR: + return 0; + } + return 0; +} diff --git a/binlogconvert/src/utils/rpl_gtid.cpp b/binlogconvert/src/utils/rpl_gtid.cpp new file mode 100644 index 0000000000000000000000000000000000000000..715ca25e69a861eb55ae13b7cf2d536141737791 --- /dev/null +++ b/binlogconvert/src/utils/rpl_gtid.cpp @@ -0,0 +1,262 @@ +/* Copyright (c) 2011, 2022, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, + as published by the Free Software Foundation. + + This program is also distributed with certain software (including + but not limited to OpenSSL) that is licensed under separate terms, + as designated in a particular file or component or in included license + documentation. The authors of MySQL hereby grant you an additional + permission to link the program and your derivative works with the + separately licensed software that they have included with MySQL. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License, version 2.0, for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +// refer to: sql/rpl_gtid_set.cc +#include "utils/rpl_gtid.h" + +#include +#include +#include // PRId64 + +#include "common/logging.h" +#include "utils/little_endian.h" + +//*******************parse util ************************* +void skip_whitespace(const char *s) +{ + while (s != nullptr && *s != '\0' && std::isspace(*s)) { + ++s; + } +} + +rpl_gno parse_gno(const char **s) +{ + char *endp; + long long ret = strtoll(*s, &endp, 0); + if (ret < 0 || ret >= GNO_END) { + return -1; + } + *s = endp; + return static_cast(ret); +} + +char *longlong10_to_str(int64_t value, char *buffer, int radix) +{ + int64_t absValue = std::abs(value); + int index = 0; + + do { + int digit = absValue % radix; + buffer[index++] = (digit < 10) ? ('0' + digit) : ('a' + digit - 10); + absValue /= radix; + } while (absValue > 0); + + if (value < 0) { + buffer[index++] = '-'; + } + + buffer[index] = '\0'; + std::reverse(buffer, buffer + index); + + return buffer; +} + +int format_gno(char *s, rpl_gno gno) +{ + return static_cast(longlong10_to_str(gno, s, 10) - s); +} + +/************************************************************************** + Gtid methods +**************************************************************************/ + +bool Gtid::is_valid(const char *text) +{ + const char *s = text; + skip_whitespace(s); + if (!rpl_sid::is_valid(s, binary_log::Uuid::TEXT_LENGTH)) { + return false; + } + s += binary_log::Uuid::TEXT_LENGTH; + skip_whitespace(s); + if (*s != ':') { + return false; + } + s++; + skip_whitespace(s); + if (parse_gno(&s) <= 0) { + return false; + } + skip_whitespace(s); + if (*s != 0) { + return false; + } + return true; +} + +int Gtid::to_string(const rpl_sid &sid, char *buf) const +{ + char *s = buf + sid.to_string(buf); + *s = ':'; + s++; + s += format_gno(s, gno_); + return (int)(s - buf); +} + +int Gtid::to_string(const Sid_map *sid_map, char *buf) const +{ + int ret; + if (sid_map != nullptr) { + const rpl_sid &sid = sid_map->sidno_to_sid(sidno_); + ret = to_string(sid, buf); + } else { + ret = sprintf(buf, "%d:%" PRId64, sidno_, gno_); + } + return ret; +} + +enum_return_status Gtid::parse(Sid_map *sid_map, const char *text) +{ + rpl_sid sid{}; + const char *s = text; + + skip_whitespace(s); + + // parse sid + if (sid.parse(s, binary_log::Uuid::TEXT_LENGTH) == 0) { + rpl_sidno sidno_var = sid_map->add_sid(sid); + if (sidno_var <= 0) { + return RETURN_STATUS_REPORTED_ERROR; + } + s += binary_log::Uuid::TEXT_LENGTH; + + skip_whitespace(s); + + // parse colon + if (*s == ':') { + s++; + + skip_whitespace(s); + + // parse gno + rpl_gno gno_var = parse_gno(&s); + if (gno_var > 0) { + skip_whitespace(s); + if (*s == '\0') { + sidno_ = sidno_var; + gno_ = gno_var; + return RETURN_STATUS_OK; + } + } + return RETURN_STATUS_REPORTED_ERROR; + } + } + // never reached + return RETURN_STATUS_UNREPORTED_ERROR; +} + +/************************************************************************** + Gtid_specification methods +**************************************************************************/ + +bool Gtid_specification::is_valid(const char *text) +{ + // AUTOMATIC, ANONYMOUS, always return true + return true; +} + +enum_return_status Gtid_specification::parse(Sid_map *sid_map, const char *text) +{ + type_ = ANONYMOUS_GTID; + gtid_.sidno_ = 0; + gtid_.gno_ = 0; + return RETURN_STATUS_OK; +} + +int Gtid_specification::to_string(const rpl_sid *sid, char *buf) const +{ + switch (type_) { + case AUTOMATIC_GTID: + strncpy(buf, "AUTOMATIC", 9); + return 9; + case NOT_YET_DETERMINED_GTID: + strncpy(buf, "NOT_YET_DETERMINED", 18); + return 18; + case ANONYMOUS_GTID: + strncpy(buf, "ANONYMOUS", 9); + return 9; + case UNDEFINED_GTID: + case ASSIGNED_GTID: + return gtid_.to_string(*sid, buf); + case PRE_GENERATE_GTID: + strncpy(buf, "PRE_GENERATE_GTID", 17); + return 17; + } + LOG_ERROR("gtid mode is invalid"); + return 0; +} + +int Gtid_specification::to_string(const Sid_map *sid_map, char *buf) const +{ + return to_string(type_ == ASSIGNED_GTID || type_ == UNDEFINED_GTID + ? &sid_map->sidno_to_sid(gtid_.sidno_) + : nullptr, + buf); +} + +rpl_sidno Sid_map::add_sid(const rpl_sid &sid) +{ + rpl_sidno sidno; + auto it = sid_to_sidno_map_.find(sid); + if (it != sid_to_sidno_map_.end()) { + return it->second->sidno_; + } else { + sidno = get_max_sidno() + 1; + if (add_node(sidno, sid) != RETURN_STATUS_OK) { + sidno = -1; + } + } + + return sidno; +} + +enum_return_status Sid_map::add_node(rpl_sidno sidno, const rpl_sid &sid) +{ + Node *node = new Node(); + node->sidno_ = sidno; + node->sid_ = sid; + + sidno_to_sid_map_.emplace_back(node); + sid_to_sidno_map_.emplace(sid, std::move(node)); + + return RETURN_STATUS_OK; +} + +/************************************************************************** + Gtid_set methods +**************************************************************************/ + +size_t Gtid_set::get_encoded_length() const +{ + size_t ret = 8; + return ret; +} + +void Gtid_set::encode(unsigned char *buf) const +{ + // make place for number of sids + uint64_t n_sids = 0; + unsigned char *n_sids_p = buf; + buf += 8; + // store number of sids + int8store(n_sids_p, n_sids); +} diff --git a/binlogconvert/src/utils/uuid.cpp b/binlogconvert/src/utils/uuid.cpp new file mode 100644 index 0000000000000000000000000000000000000000..297745aed0a3417f5be4f722c647895f2a6a9389 --- /dev/null +++ b/binlogconvert/src/utils/uuid.cpp @@ -0,0 +1,150 @@ +/* Copyright (c) 2014, 2022, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, + as published by the Free Software Foundation. + + This program is also distributed with certain software (including + but not limited to OpenSSL) that is licensed under separate terms, + as designated in a particular file or component or in included license + documentation. The authors of MySQL hereby grant you an additional + permission to link the program and your derivative works with the + separately licensed software that they have included with MySQL. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License, version 2.0, for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +// refer to: libbinlogevents/src/uuid.cpp + +#include "utils/uuid.h" + +/* +const size_t Uuid::TEXT_LENGTH; +const size_t Uuid::BYTE_LENGTH; +const size_t Uuid::BIT_LENGTH; +*/ +namespace binary_log { + +const int Uuid::bytes_per_section[NUMBER_OF_SECTIONS] = {4, 2, 2, 2, 6}; +const int Uuid::hex_to_byte[] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +}; + +int Uuid::parse(const char *string, size_t len) +{ + return parse(string, len, bytes); +} + +int Uuid::parse(const char *in_string, size_t len, const unsigned char *out_str) +{ + const unsigned char **p_out_str = out_str ? &out_str : nullptr; + + switch (len) { + // UUID without dashes. ex 12345678123456781234567812345678 + case TEXT_LENGTH - 4: + if (read_section((TEXT_LENGTH - 4) / 2, &in_string, p_out_str)) { + return 1; + } + break; + // UUID with braces ex {12345678-1234-5678-1234-567812345678} + case TEXT_LENGTH + 2: + if (*in_string != '{' || in_string[TEXT_LENGTH + 1] != '}') { + return 1; + } + in_string++; + [[fallthrough]]; + // standard UUID ex 12345678-1234-5678-1234-567812345678 + case TEXT_LENGTH: + for (int i = 0; i < NUMBER_OF_SECTIONS - 1; i++) { + if (read_section(bytes_per_section[i], &in_string, p_out_str)) { + return 1; + } + if (*in_string == '-') { + in_string++; + } else { + return 1; + } + } + if (read_section(bytes_per_section[NUMBER_OF_SECTIONS - 1], &in_string, p_out_str)) { + return 1; + } + break; + default: + return 1; + } + return 0; +} + +bool Uuid::read_section(int section_len, const char **section_str, + const unsigned char **out_binary_str) +{ + const unsigned char **section_string = reinterpret_cast(section_str); + for (int j = 0; j < section_len; j++) { + int hi = hex_to_byte[**section_string]; + if (hi == -1) { + return true; + } + (*section_string)++; + int lo = hex_to_byte[**section_string]; + if (lo == -1) { + return true; + } + (*section_string)++; + if (out_binary_str) { + unsigned char *u = const_cast(*out_binary_str); + *u = ((hi << 4) + lo); + (*out_binary_str)++; + } + } + return false; +} + +bool Uuid::is_valid(const char *s, size_t len) +{ + return parse(s, len, nullptr) == 0; +} + +size_t Uuid::to_string(const unsigned char *bytes_arg, char *buf) +{ + static const char byte_to_hex[] = "0123456789abcdef"; + const unsigned char *u = bytes_arg; + for (int i = 0; i < NUMBER_OF_SECTIONS; i++) { + if (i > 0) { + *buf = '-'; + buf++; + } + for (int j = 0; j < bytes_per_section[i]; j++) { + int byte = *u; + *buf = byte_to_hex[byte >> 4]; + buf++; + *buf = byte_to_hex[byte & 0xf]; + buf++; + u++; + } + } + *buf = '\0'; + return TEXT_LENGTH; +} + +size_t Uuid::to_string(char *buf) const +{ + return to_string(bytes, buf); +} + +} // namespace binary_log diff --git a/binlogconvert/test/CMakeLists.txt b/binlogconvert/test/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..356821b71133cf9d1d72407eeb16944eafc7f75a --- /dev/null +++ b/binlogconvert/test/CMakeLists.txt @@ -0,0 +1,26 @@ +include(GoogleTest) + +#Copy data file to the build directory +file(COPY data DESTINATION ${CMAKE_BINARY_DIR}/test) + +file(GLOB_RECURSE SRC_TEST ./*.cpp) + +foreach(F ${SRC_TEST}) # unit tests + file(RELATIVE_PATH R ${CMAKE_CURRENT_SOURCE_DIR} ${F}) + string(REPLACE ".cpp" "" R ${R}) + message(STATUS "+ " ${F}) + add_executable(${R} ${F}) + + # Link with gtest and loft + target_link_libraries(${R} gtest gtest_main sql2bl stdc++fs) + + set_target_properties(${R} + PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/test + COMMAND ${R} + ) + + # Enable CTest for unit tests + gtest_discover_tests(${R}) +endforeach() + diff --git a/binlogconvert/test/data b/binlogconvert/test/data new file mode 100644 index 0000000000000000000000000000000000000000..88efbe348a77d477b05160b51443718575c9e653 Binary files /dev/null and b/binlogconvert/test/data differ diff --git a/binlogconvert/test/event_test.cpp b/binlogconvert/test/event_test.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f046ab90356ed9f0ad08a6505c7e8e152c2c0517 --- /dev/null +++ b/binlogconvert/test/event_test.cpp @@ -0,0 +1,363 @@ +// +// Created by Coonger on 2024/10/17. +// + +#include + +#include "events/control_events.h" +#include "events/rows_event.h" +#include "events/statement_events.h" + +#include "common/logging.h" +#include "common/macros.h" + +#include "binlog.h" +#include "log_file.h" +#include "utils/table_id.h" + +using namespace loft; + +/** + * @brief 打开一个 binlog 文件, 如果是一个新的,则会自动写入 magic number 和 fde 事件 + * 测试 next_file(),并且可以看到 ON.000001 这个文件的末尾有写入 rotate event + */ +TEST(CONTROL_EVENT_FORMAT_TEST, OPEN_NEXT_FILE_ROTATE) +{ + auto logFileManager = std::make_unique(); + // DEFAULT_BINLOG_FILE_DIR 的字节数组 + char byteArray[] = "\x2f\x68\x6f\x6d\x65\x2f\x79\x69\x6e\x63\x6f\x6e\x67\x2f\x63\x6f\x6c\x6c" + "\x65\x63\x74\x42\x69\x6e\x2f"; + logFileManager->SetBinlogPath(byteArray, 25, DEFAULT_BINLOG_FILE_SIZE, 4, DEFAULT_BINLOG_FILE_SIZE * 10, 5); + + // 2. 重新打开一个 新的 binlog 文件,准备写 + auto fileWriter = logFileManager->get_file_writer(); + logFileManager->next_file(*fileWriter); + + auto files = logFileManager->get_log_files(); + + EXPECT_EQ(files.size(), 3); + for (auto &file : files) { + std::cout << file.second.first << std::endl; + } + + fileWriter->close(); +} + +TEST(CONTROL_EVENT_FORMAT_TEST, FORMAT_DESCRIPTION_EVENT) +{ + const char *test_file_name = "test_magic_fde"; + uint64_t test_file_size = 1024; + + RC ret; + auto binlog = std::make_unique(test_file_name, test_file_size, ret); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to create binlog file."); + + ret = binlog->open(); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to open binlog file"); + + auto fde = std::make_unique(4, "8.0.32-debug"); + binlog->write_event_to_binlog(fde.get()); + + binlog->close(); +} + +/** + * @brief 测试 binlog 写入 GTID 事件 + */ +TEST(CONTROL_EVENT_FORMAT_TEST, GTID_EVENT) +{ + const char *test_file_name = "test_gtid"; + uint64_t test_file_size = 1024; + + RC ret; + auto binlog = std::make_unique(test_file_name, test_file_size, ret); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to create binlog file."); + + ret = binlog->open(); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to open binlog file"); + + long long int last_committed_arg = 30; + long long int sequence_number_arg = 31; + bool may_have_sbr_stmts_arg = true; + unsigned long long int original_commit_timestamp_arg = 1722493959000068; + unsigned long long int immediate_commit_timestamp_arg = 1722493961117679; + + auto ge = std::make_unique(last_committed_arg, sequence_number_arg, may_have_sbr_stmts_arg, + original_commit_timestamp_arg, immediate_commit_timestamp_arg, + ORIGINAL_SERVER_VERSION, IMMEDIATE_SERVER_VERSION); + binlog->write_event_to_binlog(ge.get()); + + binlog->close(); +} + +/** + * @brief 测试 binlog 写入 Query 事件 + */ +TEST(STATEMENT_EVENT_FORMAT_TEST, QUERY_EVENT) +{ + const char *test_file_name = "test_query"; + uint64_t test_file_size = 1024; + + RC ret; + auto binlog = std::make_unique(test_file_name, test_file_size, ret); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to create binlog file."); + + ret = binlog->open(); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to open binlog file"); + + const char *query_arg = "create table t1 (id int)"; + const char *catalog_arg = nullptr; + const char *db_arg = "t1"; // 假设没有的话,mysqlbinlog默认理解成 + // mysql,所以会 use 'mysql' + catalog_arg = db_arg; + uint64_t ddl_xid_arg = 31; + size_t query_length = strlen(query_arg); + unsigned long thread_id_arg = 10000; // 随意 + /// 这三个参数,暂时没用到 + unsigned long long sql_mode_arg = 0; // 随意 + unsigned long auto_increment_increment_arg = 0; // 随意 + unsigned long auto_increment_offset_arg = 0; // 随意 + /// + unsigned int number = 0; // 时区,0 表示 en-US + unsigned long long table_map_for_update_arg = 0; // 只涉及单表 update,所以填 0 + int errcode = 0; // 默认不出错 + uint64 immediate_commit_timestamp_arg = 1722493961117679; + + auto qe = std::make_unique(query_arg, catalog_arg, db_arg, ddl_xid_arg, query_length, thread_id_arg, + errcode, immediate_commit_timestamp_arg); + + binlog->write_event_to_binlog(qe.get()); + + binlog->close(); +} + +/** + * @brief 测试 binlog 写入 Table_map 事件 + */ +TEST(ROWS_EVENT_FORMAT_TEST, TABLE_MAP_EVENT) +{ + const char *test_file_name = "test_table_map"; + uint64_t test_file_size = 1024; + + RC ret; + auto binlog = std::make_unique(test_file_name, test_file_size, ret); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to create binlog file."); + + ret = binlog->open(); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to open binlog file"); + + // 1. 查询 table_name 是否访问过, 如果没有, 就创建一个 Table_id 对象 + Table_id tid(13); + // 2. 读 field's size() + uint64 colCnt = 1; + const char *dbName = "t1"; + const char *tblName = "t1"; + + std::vector field_vec; + auto field_obj = mysql::make_field("a1", 0, false, false, 0, MYSQL_TYPE_LONG, 0, 0); + field_vec.emplace_back(field_obj); + + uint64 immediate_commit_timestamp_arg = 1722493961117679; + + auto table_map_event = std::make_unique( + tid, colCnt, dbName, strlen(dbName), tblName, strlen(tblName), field_vec, immediate_commit_timestamp_arg); + binlog->write_event_to_binlog(table_map_event.get()); + + binlog->close(); +} + +/** + * @brief 测试 binlog 写入 insert sql 的 write row 事件 + * insert t1 values(1); 向 t1 表中插入一行,有 1 个 column,int 类型 + */ +TEST(ROWS_EVENT_FORMAT_TEST, WRITE_EVENT) +{ + const char *test_file_name = "test_insert_row"; + uint64_t test_file_size = 1024; + + RC ret; + auto binlog = std::make_unique(test_file_name, test_file_size, ret); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to create binlog file."); + + ret = binlog->open(); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to open binlog file"); + + // TODO 查询 table_name 是否访问过, 如果没有, 就创建一个 Table_id 对象 + Table_id tid(13); + // 2. 读 field's size() + uint64 colCnt = 1; + const char *dbName = "t1"; + const char *tblName = "t1"; + + std::vector field_vec; + auto field_obj = mysql::make_field("a1", 0, false, false, 0, MYSQL_TYPE_LONG, 0, 0); + field_vec.emplace_back(field_obj); + + uint64 immediate_commit_timestamp_arg = 1722493961117679; + + auto table_map_event = std::make_unique( + tid, colCnt, dbName, strlen(dbName), tblName, strlen(tblName), field_vec, immediate_commit_timestamp_arg); + binlog->write_event_to_binlog(table_map_event.get()); + + auto insertRow = std::make_unique(tid, colCnt, 1, WRITE_ROWS_EVENT, + immediate_commit_timestamp_arg); // 初始化 一个 rows_event 对象 + + int data1 = 1; + std::vector rows{1}; + std::vector rows_null{0}; + insertRow->set_rows_after(std::move(rows)); + insertRow->set_null_after(std::move(rows_null)); + insertRow->write_data_after(reinterpret_cast(&data1), MYSQL_TYPE_LONG, 4, 0, 0, 0); + + binlog->write_event_to_binlog(insertRow.get()); + + binlog->close(); +} + +/** + * @brief 测试 binlog 写入 update sql 的 write row 事件 + * update t1 set a1 = 10 where a1 = 1; 向 t1 表中更新一行,有一个 column,int 类型 + */ +TEST(ROWS_EVENT_FORMAT_TEST, UPDATE_EVENT) +{ + const char *test_file_name = "test_update_row"; + uint64_t test_file_size = 1024; + + RC ret; + auto binlog = std::make_unique(test_file_name, test_file_size, ret); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to create binlog file."); + + ret = binlog->open(); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to open binlog file"); + + Table_id tid(13); + // 2. 读 field's size() + uint64 colCnt = 1; + const char *dbName = "t1"; + const char *tblName = "t1"; + + std::vector field_vec; + auto field_obj = mysql::make_field("a1", 0, false, false, 0, MYSQL_TYPE_LONG, 0, 0); + field_vec.emplace_back(field_obj); + + uint64 immediate_commit_timestamp_arg = 1722493961117679; + + auto table_map_event = std::make_unique( + tid, colCnt, dbName, strlen(dbName), tblName, strlen(tblName), field_vec, immediate_commit_timestamp_arg); + binlog->write_event_to_binlog(table_map_event.get()); + + auto updateRow = std::make_unique(tid, colCnt, 1, UPDATE_ROWS_EVENT, + immediate_commit_timestamp_arg); // 初始化 一个 rows_event 对象 + + int newData1 = 10; + std::vector rows_after{1}; + std::vector rows_null_after{0}; + updateRow->set_rows_after(std::move(rows_after)); + updateRow->set_null_after(std::move(rows_null_after)); + updateRow->write_data_after(reinterpret_cast(&newData1), MYSQL_TYPE_LONG, 4, 0, 0, 0); + + int conditionData = 1; + std::vector rows_before{1}; + std::vector rows_null_before{0}; + updateRow->set_rows_before(std::move(rows_before)); + updateRow->set_null_before(std::move(rows_null_before)); + updateRow->write_data_before(reinterpret_cast(&conditionData), MYSQL_TYPE_LONG, 4, 0, 0, 0); + + binlog->write_event_to_binlog(updateRow.get()); + + binlog->close(); +} + +/** + * @brief 测试 binlog 写入 delete sql 的 write row 事件 + * delete from t1 where a1 = 10 + */ +TEST(ROWS_EVENT_FORMAT_TEST, DELETE_EVENT) +{ + const char *test_file_name = "test_delete_row"; + uint64_t test_file_size = 1024; + + RC ret; + auto binlog = std::make_unique(test_file_name, test_file_size, ret); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to create binlog file."); + + ret = binlog->open(); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to open binlog file"); + + Table_id tid(13); + // 2. 读 field's size() + uint64 colCnt = 1; + const char *dbName = "t1"; + const char *tblName = "t1"; + + std::vector field_vec; + auto field_obj = mysql::make_field("a1", 0, false, false, 0, MYSQL_TYPE_LONG, 0, 0); + field_vec.emplace_back(field_obj); + + uint64 immediate_commit_timestamp_arg = 1722493961117679; + + auto table_map_event = std::make_unique( + tid, colCnt, dbName, strlen(dbName), tblName, strlen(tblName), field_vec, immediate_commit_timestamp_arg); + binlog->write_event_to_binlog(table_map_event.get()); + + auto deleteRow = std::make_unique(tid, colCnt, 1, DELETE_ROWS_EVENT, + immediate_commit_timestamp_arg); // 初始化 一个 rows_event 对象 + + int conditionData = 10; + std::vector rows_before{1}; + std::vector rows_null_before{0}; + deleteRow->set_rows_before(std::move(rows_before)); + deleteRow->set_null_before(std::move(rows_null_before)); + deleteRow->write_data_before(reinterpret_cast(&conditionData), MYSQL_TYPE_LONG, 4, 0, 0, 0); + + binlog->close(); +} + +/** + * @brief 测试 binlog 写入 Xid 事件 + */ +TEST(CONTROL_EVENT_FORMAT_TEST, XID_EVENT) +{ + const char *test_file_name = "test_xid"; + uint64_t test_file_size = 1024; + + RC ret; + auto binlog = std::make_unique(test_file_name, test_file_size, ret); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to create binlog file."); + + ret = binlog->open(); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to open binlog file"); + + uint64 txSeq = 35; + uint64 immediate_commit_timestamp_arg = 1722493961117679; + + auto xe = std::make_unique(txSeq, immediate_commit_timestamp_arg); + binlog->write_event_to_binlog(xe.get()); + + binlog->close(); +} + +/** + * @brief 测试 binlog 写入 Rotate 事件 + */ +TEST(CONTROL_EVENT_FORMAT_TEST, ROTATE_EVENT) +{ + const char *test_file_name = "test_rotate"; + uint64_t test_file_size = 1024; + + RC ret; + auto binlog = std::make_unique(test_file_name, test_file_size, ret); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to create binlog file."); + + ret = binlog->open(); + LOFT_VERIFY(ret != RC::SUCCESS, "Failed to open binlog file"); + + std::string next_binlog_file_name = "ON.000021"; + LOG_INFO("next binlog file_name len: %zu", next_binlog_file_name.length()); + + auto re = std::make_unique(next_binlog_file_name.c_str(), next_binlog_file_name.length(), + Rotate_event::DUP_NAME, 4); + binlog->write_event_to_binlog(re.get()); + + binlog->close(); +} diff --git a/binlogconvert/test/fbs_test.cpp b/binlogconvert/test/fbs_test.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8d80d0c9a8791b5fbbe7c0adb7c217ad29062fdb --- /dev/null +++ b/binlogconvert/test/fbs_test.cpp @@ -0,0 +1,485 @@ +#include +#include + +#include "format/ddl_generated.h" + +#include "common/logging.h" + +#include "binlog.h" +#include "buffer_reader.h" +#include "log_file.h" +#include "transform_manager.h" +#include "utils/base64.h" + +using namespace loft; // flatbuffer namespace + +/** + * @brief 1. 测试 RedoLogFileReader 的 readFromFile 方法 & BufferReader 的 read 方法 + * 2. 验证读 DDL sql,create db 字段格式能否正确解析, 缺少 dbName 和 table 字段,共 11 个字段 + */ +TEST(DDL_TEST, CREATE_DB) +{ + std::string file_name = "./data"; + auto reader = std::make_unique(); + auto [data, fileSize] = reader->readFromFile(file_name); + auto bufferReader = std::make_unique(data.get(), fileSize); + + auto sql_len = bufferReader->read(); + EXPECT_EQ(sql_len, 248); + + std::vector buf(sql_len); + bufferReader->memcpy(buf.data(), sql_len); + auto ddl = GetDDL(buf.data()); + + auto ckp = ddl->check_point(); + EXPECT_STREQ(ckp->c_str(), "31-1-54348795023361"); + + auto dbName = ddl->db_name(); + EXPECT_TRUE(dbName == nullptr); + + auto ddlSql = ddl->ddl_sql(); + EXPECT_STREQ(ddlSql->c_str(), "create database t1"); + + auto ddlType = ddl->ddl_type(); + EXPECT_STREQ(ddlType->c_str(), "CREATE TABLE"); + + auto lastCommit = ddl->last_commit(); + EXPECT_EQ(lastCommit, 30); + + auto msgTime = ddl->msg_time(); + EXPECT_STREQ(msgTime->c_str(), "2024-08-01 14:32:41.000054"); + + auto opType = ddl->op_type(); + EXPECT_STREQ(opType->c_str(), "DDL"); + + auto scn = ddl->scn(); + EXPECT_EQ(scn, 54348795023361); + + auto seq = ddl->seq(); + EXPECT_EQ(seq, 1); + + auto table = ddl->table_(); + EXPECT_TRUE(table == nullptr); + + auto txSeq = ddl->tx_seq(); + EXPECT_EQ(txSeq, 31); + + auto txTime = ddl->tx_time(); + EXPECT_STREQ(txTime->c_str(), "2024-08-01 14:32:39.000068"); +} + +/** + * @brief 测试 读 DDL sql,create table 字段格式能否正确解析,共 13 个完整字段都有数据 + */ +TEST(DDL_TEST, CREATE_TABLE) +{ + std::string file_name = "./data"; + auto reader = std::make_unique(); + auto [data, fileSize] = reader->readFromFile(file_name); + + auto bufferReader = std::make_unique(data.get(), fileSize); + + uint32 sql_len; + int SKIP_CNT = 1; + for (int k = 0; k < SKIP_CNT; k++) { + sql_len = bufferReader->read(); + bufferReader->forward(sql_len); + } + + sql_len = bufferReader->read(); + EXPECT_EQ(sql_len, 744); + + std::vector buf(sql_len); + bufferReader->memcpy(buf.data(), sql_len); + auto ddl = GetDDL(buf.data()); + + auto ckp = ddl->check_point(); + EXPECT_STREQ(ckp->c_str(), "33-1-54349172944897"); + + auto dbName = ddl->db_name(); + EXPECT_STREQ(dbName->c_str(), "t1"); + + auto ddlSql = ddl->ddl_sql(); + EXPECT_STREQ(ddlSql->c_str(), + "create table t1(a1 int primary key, a2 char(20),a3 bit(23), a4 smallint, a5 smallint " + "unsigned, a6 mediumint, a7 mediumint unsigned, a8 int unsigned, a9 bigint, a10 bigint " + "unsigned, a11 float(10,5), a12 float(10,5) unsigned, a13 double(20,10), a14 double(20,10) " + "unsigned, a15 decimal(10,5), a16 decimal(10,5) unsigned, a17 year(4), a18 " + "enum('aa','bb','cc'), a19 set('dd','ee','ff'), a20 tinytext, a21 text, a22 mediumtext, " + "a23 longtext, a24 tinyblob, a25 blob, a26 mediumblob, a27 longblob)"); + + auto ddlType = ddl->ddl_type(); + EXPECT_STREQ(ddlType->c_str(), "CREATE TABLE"); + + auto lastCommit = ddl->last_commit(); + EXPECT_EQ(lastCommit, 32); + + auto lsn = ddl->lsn(); + EXPECT_EQ(lsn, 279711); + + auto msgTime = ddl->msg_time(); + EXPECT_STREQ(msgTime->c_str(), "2024-08-01 14:32:41.000117"); + + auto opType = ddl->op_type(); + EXPECT_STREQ(opType->c_str(), "DDL"); + + auto scn = ddl->scn(); + EXPECT_EQ(scn, 54349172944897); + + auto seq = ddl->seq(); + EXPECT_EQ(seq, 1); + + auto table = ddl->table_(); + EXPECT_STREQ(table->c_str(), "temp"); + + auto txSeq = ddl->tx_seq(); + EXPECT_EQ(txSeq, 33); + + auto txTime = ddl->tx_time(); + EXPECT_STREQ(txTime->c_str(), "2024-08-01 14:32:39.000160"); +} + +/** + * @brief 测试 读 DDL sql,drop table 字段格式能否正确解析,共 13 个完整字段都有数据 + */ +TEST(DDL_TEST, DROP_TABLE) +{ + std::string file_name = "./data"; + auto reader = std::make_unique(); + auto [data, fileSize] = reader->readFromFile(file_name); + auto bufferReader = std::make_unique(data.get(), fileSize); + // 跳过前 8 条 sql + uint32 sql_len; + int SKIP_CNT = 8; + for (int k = 0; k < SKIP_CNT; k++) { + sql_len = bufferReader->read(); + bufferReader->forward(sql_len); + } + + sql_len = bufferReader->read(); + EXPECT_EQ(sql_len, 264); + + std::vector buf(sql_len); + bufferReader->memcpy(buf.data(), sql_len); + auto ddl = GetDDL(buf.data()); + + auto ckp = ddl->check_point(); + EXPECT_STREQ(ckp->c_str(), "43-1-54350345428993"); + + auto dbName = ddl->db_name(); + EXPECT_STREQ(dbName->c_str(), "t1"); + + auto ddlSql = ddl->ddl_sql(); + EXPECT_STREQ(ddlSql->c_str(), "drop table t1"); + + auto ddlType = ddl->ddl_type(); + EXPECT_STREQ(ddlType->c_str(), "DROP TABLE"); + + auto lastCommit = ddl->last_commit(); + EXPECT_EQ(lastCommit, 42); + + auto lsn = ddl->lsn(); + EXPECT_EQ(lsn, 280191); + + auto msgTime = ddl->msg_time(); + EXPECT_STREQ(msgTime->c_str(), "2024-08-01 14:32:41.000156"); + + auto opType = ddl->op_type(); + EXPECT_STREQ(opType->c_str(), "DDL"); + + auto scn = ddl->scn(); + EXPECT_EQ(scn, 54350345428993); + + auto seq = ddl->seq(); + EXPECT_EQ(seq, 1); + + auto table = ddl->table_(); + EXPECT_STREQ(table->c_str(), "temp"); + + auto txSeq = ddl->tx_seq(); + EXPECT_EQ(txSeq, 43); + + auto txTime = ddl->tx_time(); + EXPECT_STREQ(txTime->c_str(), "2024-08-01 14:32:39.000446"); +} + +/** + * @brief 测试 读 DDL sql,drop table 字段格式能否正确解析,缺少 dbName, ddlType, table, 共 10 + * 个字段 + */ +TEST(DDL_TEST, DROP_DB) +{ + std::string file_name = "./data"; + auto reader = std::make_unique(); + auto [data, fileSize] = reader->readFromFile(file_name); + auto bufferReader = std::make_unique(data.get(), fileSize); + // 跳过前 9 条 sql + uint32 sql_len; + int SKIP_CNT = 9; + for (int k = 0; k < SKIP_CNT; k++) { + sql_len = bufferReader->read(); + bufferReader->forward(sql_len); + } + + sql_len = bufferReader->read(); + EXPECT_EQ(sql_len, 224); + + std::vector buf(sql_len); + bufferReader->memcpy(buf.data(), sql_len); + auto ddl = GetDDL(buf.data()); + + auto ckp = ddl->check_point(); + EXPECT_STREQ(ckp->c_str(), "46-1-54350647873537"); + + auto dbName = ddl->db_name(); + EXPECT_TRUE(dbName == nullptr); + + auto ddlSql = ddl->ddl_sql(); + EXPECT_STREQ(ddlSql->c_str(), "drop database t1"); + + auto ddlType = ddl->ddl_type(); + EXPECT_TRUE(ddlType == nullptr); + + auto lastCommit = ddl->last_commit(); + EXPECT_EQ(lastCommit, 45); + + auto lsn = ddl->lsn(); + EXPECT_EQ(lsn, 281581); + + auto msgTime = ddl->msg_time(); + EXPECT_STREQ(msgTime->c_str(), "2024-08-01 14:32:41.000157"); + + auto opType = ddl->op_type(); + EXPECT_STREQ(opType->c_str(), "DDL"); + + auto scn = ddl->scn(); + EXPECT_EQ(scn, 54350647873537); + + auto seq = ddl->seq(); + EXPECT_EQ(seq, 1); + + auto table = ddl->table_(); + EXPECT_TRUE(table == nullptr); + + auto txSeq = ddl->tx_seq(); + EXPECT_EQ(txSeq, 46); + + auto txTime = ddl->tx_time(); + EXPECT_STREQ(txTime->c_str(), "2024-08-01 14:32:39.000520"); +} + +/** + * @brief 验证读 DML insert2 sql,字段格式能否正确解析 [newData] 缺少 a12 a19 a20 + * update / delete sql 的逻辑一致,其中insert2 最具有代表性(keys 和 newData + * 字段解析结构是相同的,都是 kvPairs,insert2的newData里有null类型) 主要是验证 [fields]:嵌套 + * FieldMeta 和 [newData]: value有long, double string, null四个类型 + */ +TEST(DML_TEST, INSERT2) +{ + // 1. 读数据到 buffer 中 + std::string filename = "./data"; + auto logFileManager = std::make_unique(); + // DEFAULT_BINLOG_FILE_DIR 的字节数组 + char byteArray[] = "\x2f\x68\x6f\x6d\x65\x2f\x79\x69\x6e\x63\x6f\x6e\x67\x2f\x63\x6f\x6c\x6c" + "\x65\x63\x74\x42\x69\x6e\x2f"; + logFileManager->SetBinlogPath(byteArray, 25, DEFAULT_BINLOG_FILE_SIZE, 4, DEFAULT_BINLOG_FILE_SIZE * 10, 5); + + auto fileReader = logFileManager->get_file_reader(); + fileReader->open(filename.c_str()); + auto [data, fileSize] = fileReader->readFromFile(filename); + auto bufferReader = std::make_unique(data.get(), fileSize); + + // 跳过前 3 条 + int SKIP_CNT = 3; + for (int k = 0; k < SKIP_CNT; k++) { + auto sql_len = bufferReader->read(); + bufferReader->forward(sql_len); + } + + auto sql_len = bufferReader->read(); + EXPECT_EQ(sql_len, 3208); + + std::vector buf(sql_len); + bufferReader->memcpy(buf.data(), sql_len); + const DML *dml = GetDML(buf.data()); + // ************* 填数据 begin ************************ + auto ckp = dml->check_point(); + EXPECT_STREQ(ckp->c_str(), "38-1-54349495054337"); + + auto dbName = dml->db_name(); + EXPECT_EQ(std::strcmp(dbName->c_str(), "t1"), 0); + + auto dn = dml->dn(); + EXPECT_EQ(dn, 0); + + auto fields = dml->fields(); + EXPECT_EQ(fields->size(), 27); + // ************* check 第一个 fields + // [字段名,fieldmeta(整数,bool,字符串)]************************ + auto field1 = fields->Get(0); + auto fieldMeta = field1->meta(); + EXPECT_STREQ(field1->name()->c_str(), "a1"); + EXPECT_EQ(fieldMeta->length(), 0); + EXPECT_EQ(fieldMeta->is_unsigned(), false); + EXPECT_EQ(fieldMeta->nullable(), false); + EXPECT_STREQ(fieldMeta->data_type()->c_str(), "INT"); + EXPECT_EQ(fieldMeta->precision(), 0); + + // insert 没有 keys 要判断一下 + auto keys = dml->keys(); + EXPECT_TRUE(keys == nullptr); + + auto lastCommit = dml->last_commit(); + EXPECT_EQ(lastCommit, 33); + + auto lsn = dml->lsn(); + EXPECT_EQ(lsn, 279792); + + auto immediateCommitTs = dml->msg_time(); + EXPECT_STREQ(immediateCommitTs->c_str(), "2024-08-01 14:32:41.000145"); + // ************* check newData ************************ + auto newData = dml->new_data(); + EXPECT_EQ(newData->size(), + 27); // 注意,这里还是 27 个,只是 null 数值的没有显示,但在二进制内容中还占位 + + // ************* newData[a11] 是 double 类型 ************************ + auto newData11 = newData->Get(0); + EXPECT_STREQ(newData11->key()->c_str(), "a11"); + EXPECT_DOUBLE_EQ(newData11->value_as_DoubleVal()->value(), 3.402820110321045); + + // ************* newData[a10] 是 long 类型 ************************ + auto newData10 = newData->Get(1); + EXPECT_STREQ(newData10->key()->c_str(), "a10"); + EXPECT_EQ(newData10->value_as_LongVal()->value(), -1); + + // ************* newData[a15] 是 string 类型,是 decimal 的字符串表示 ******** + auto newData15 = newData->Get(4); + EXPECT_STREQ(newData15->key()->c_str(), "a15"); + EXPECT_STREQ(newData15->value_as_StringVal()->value()->c_str(), "3.40282"); + + // ************* newData[a2] 是 string 类型,是 mysql 字符类型的 base64 加密表示, 还要 + // base64明文编码出来*** + auto newData2 = newData->Get(19); + EXPECT_STREQ(newData2->key()->c_str(), "a2"); + const char *value = newData2->value_as_StringVal()->value()->c_str(); + + char *dst = (char *)malloc(base64_needed_decoded_length(strlen(value))); + int64_t dst_len = base64_decode(value, strlen(value), (void *)dst, nullptr, 0); + EXPECT_STREQ(dst, "a"); + EXPECT_EQ(dst_len, 1); + + // ************* newData[a12] 是 null 类型 ************************ + auto newData12 = newData->Get(3); + EXPECT_STREQ(newData12->key()->c_str(), "a12"); + EXPECT_TRUE(newData12->value() == nullptr); + + auto opType = dml->op_type(); + EXPECT_STREQ(opType->c_str(), "I"); + + auto scn = dml->scn(); + EXPECT_EQ(scn, 54349495054337); + + auto table = dml->table_(); + EXPECT_STREQ(table->c_str(), "t1"); + + auto seqNo = dml->tx_seq(); + EXPECT_EQ(seqNo, 38); + + auto originalCommitTs = dml->tx_time(); + EXPECT_STREQ(originalCommitTs->c_str(), "2024-08-01 14:32:39.000238"); + + free(dst); +} + +void process_binlog_file(const std::string &filename, bool isDDL, int skip_count, int epoch_count) +{ + // 1. 新建一个 binlog 文件,开启写功能 + auto logFileManager = std::make_unique(); + // DEFAULT_BINLOG_FILE_DIR 的字节数组 + char byteArray[] = "\x2f\x68\x6f\x6d\x65\x2f\x79\x69\x6e\x63\x6f\x6e\x67\x2f\x63\x6f\x6c\x6c" + "\x65\x63\x74\x42\x69\x6e\x2f"; + logFileManager->SetBinlogPath(byteArray, 25, DEFAULT_BINLOG_FILE_SIZE, 4, DEFAULT_BINLOG_FILE_SIZE * 10, 5); + + // 打开并读取 binlog 文件 + auto fileReader = logFileManager->get_file_reader(); + fileReader->open(filename.c_str()); + auto [data, fileSize] = fileReader->readFromFile(filename); + auto bufferReader = std::make_unique(data.get(), fileSize); + + // 2. 打开新的一个 binlog 文件,准备写 + auto fileWriter = logFileManager->get_file_writer(); + logFileManager->create_file(*fileWriter); + + // 跳过前 N 条 + uint32 sql_len; + for (int k = 0; k < skip_count; k++) { + sql_len = bufferReader->read(); + bufferReader->forward(sql_len); + } + + // 读指定数量的 SQL 语句并处理 + for (int k = 0; k < epoch_count; k++) { + sql_len = bufferReader->read(); + + std::vector buf(sql_len); + bufferReader->memcpy(buf.data(), sql_len); + + logFileManager->ConvertFlatBufferToBinlog(buf.get(), sql_len, isDDL); + } + + // 3. 关闭 binlog 文件流[logFileManager 析构自动关闭] +} + +/** + * @brief 连续转换 2 条 DDL sql: create db + create table,并验证是否能回放成功 + */ +TEST(SQL_TEST, DDL_CREATE_DB_TABLE) +{ + std::string filename = "./data"; + process_binlog_file(filename, true, 0, 2); // 0条跳过,2条处理 +} + +/** + * @brief 连续转换 3 条 DML insert sql,并验证是否能回放成功 + */ +TEST(SQL_TEST, DML_INSERT) +{ + std::string filename = "./data"; + process_binlog_file(filename, false, 2, 3); // 2条跳过,3条处理 +} + +/** + * @brief 连续转换 2 条 DML update sql,并验证是否能回放成功 + */ +TEST(SQL_TEST, DML_UPDATE) +{ + std::string filename = "./data"; + process_binlog_file(filename, false, 5, 2); // 5条跳过,2条处理 +} + +/** + * @brief 转换 1 条 DML delete sql,并验证是否能回放成功 + */ +TEST(SQL_TEST, DML_DELETE) +{ + std::string filename = "./data"; + process_binlog_file(filename, false, 7, 1); // 7条跳过,1条处理 +} + +/** + * @brief 连续转换 2 条 DDL sql: drop table + drop db,并验证是否能回放成功 + */ +TEST(SQL_TEST, DDL_DROP_DB_TABLE) +{ + std::string filename = "./data"; + process_binlog_file(filename, true, 8, 2); // 8条跳过,2条处理 +} + +/** + * @brief 测试带 json 类型的 DML insert sql,并验证是否能回放成功 + */ +TEST(SQL_TEST, DML_INSERT_JSON) +{ + std::string filename = "./data2"; + process_binlog_file(filename, false, 8, 1); // 8条跳过,1条处理 +} diff --git a/binlogconvert/test/log_file_test.cpp b/binlogconvert/test/log_file_test.cpp new file mode 100644 index 0000000000000000000000000000000000000000..03feed5ffff52cc41dab37b3e886f94ab6ee5011 --- /dev/null +++ b/binlogconvert/test/log_file_test.cpp @@ -0,0 +1,112 @@ +#include "buffer_reader.h" +#include "log_file.h" +#include +#include + +/** + * @brief 验证 接口一 SetBinlogPath() 接口是否正确设置 + */ +TEST(LOG_FILE_TEST, DISABLED_INIT_TEST) +{ + auto logFileManager = std::make_unique(); + + char byteArray[] = "\x2f\x68\x6f\x6d\x65\x2f\x79\x69\x6e\x63\x6f\x6e\x67\x2f\x63\x6f\x6c\x6c" + "\x65\x63\x74\x42\x69\x6e\x2f"; + RC ret = + logFileManager->SetBinlogPath(byteArray, 25, DEFAULT_BINLOG_FILE_SIZE, 4, DEFAULT_BINLOG_FILE_SIZE * 100, 3600); + EXPECT_EQ(ret, RC::SUCCESS); + + EXPECT_STREQ(logFileManager->get_directory(), "/home/yincong/collectBin/"); + EXPECT_STREQ(logFileManager->get_file_prefix(), "ON"); + EXPECT_EQ(logFileManager->get_file_max_size(), 20971520); + + auto files = logFileManager->get_log_files(); + + EXPECT_EQ(files.size(), 2); + for (auto &file : files) { + std::cout << file.second.first << std::endl; + } +} + +/** + * @brief 统计 directory 目录下,有多少个 binlog 文件 + */ +TEST(LOG_FILE_TEST, LIST_FILE_TEST) +{ + + auto logFileManager = std::make_unique(); + + char byteArray[] = "\x2f\x68\x6f\x6d\x65\x2f\x79\x69\x6e\x63\x6f\x6e\x67\x2f\x63\x6f\x6c\x6c" + "\x65\x63\x74\x42\x69\x6e\x2f"; + logFileManager->SetBinlogPath(byteArray, 25, DEFAULT_BINLOG_FILE_SIZE, 4, DEFAULT_BINLOG_FILE_SIZE * 100, 1); + + auto files = logFileManager->get_log_files(); + + EXPECT_EQ(files.size(), 2); + for (auto &file : files) { + std::cout << file.second.first << std::endl; + } +} + +/** + * @brief API2 / API3 接口测试 + */ +TEST(LOG_FILE_TEST, DATA1_TEST) +{ + // 拼接上 data 文件名 + std::string filename = "/home/yincong/binlogconvert/loft/test/data1"; + // 1. 创建一个 LogFileManager 对象,获得 3 个必要对象 + auto logFileManager = std::make_unique(); + + // DEFAULT_BINLOG_FILE_DIR 的字节数组 + char byteArray[] = "\x2f\x68\x6f\x6d\x65\x2f\x79\x69\x6e\x63\x6f\x6e\x67\x2f\x63\x6f\x6c\x6c" + "\x65\x63\x74\x42\x69\x6e\x2f"; + logFileManager->SetBinlogPath(byteArray, 25, DEFAULT_BINLOG_FILE_SIZE, 4, DEFAULT_BINLOG_FILE_SIZE * 10, 5); + + auto fileReader = logFileManager->get_file_reader(); + // reader 的成员变量等到 open 之后再初始化 + + auto readFileStartTime = std::chrono::high_resolution_clock::now(); // 记录开始时间 + + fileReader->open(filename.c_str()); + auto [data, fileSize] = fileReader->readFromFile(filename); + auto bufferReader = std::make_unique(data.get(), fileSize); + + auto readFileEndTime = std::chrono::high_resolution_clock::now(); // 记录文件读取结束时间 + auto duration = std::chrono::duration_cast(readFileEndTime - readFileStartTime).count(); + LOG_DEBUG("read file time: %ld ms", duration); + + std::vector futures; + + // 处理DDL + int DDLEPOCH = 3; + for (int k = 0; k < DDLEPOCH; k++) { + auto sql_len = bufferReader->read(); + std::vector buf(sql_len); + bufferReader->memcpy(buf.data(), sql_len); + RC rc = logFileManager->ConvertFlatBufferToBinlog(std::move(buf.data()), sql_len, true).get(); + EXPECT_EQ(rc, RC::SUCCESS); + } + + bufferReader->forward(bufferReader->read()); + + // 处理DML + int DMLEPOCH = 703435; + for (int k = 0; k < DMLEPOCH; k++) { + auto sql_len = bufferReader->read(); + std::vector buf(sql_len); + bufferReader->memcpy(buf.data(), sql_len); + RC rc = logFileManager->ConvertFlatBufferToBinlog(std::move(buf.data()), sql_len, false).get(); + EXPECT_EQ(rc, RC::SUCCESS); + } + + // 中途查询进度 + LOG_DEBUG("test show process......"); + logFileManager->log_progress(); + + long scn = 0; + long seq = 0; + char *ckp = nullptr; + logFileManager->GetLastScnAndSeq(&scn, &seq, &ckp); + std::cout << " ckp: " << ckp << std::endl; +}