From 4ebffe409c4d11f3495355aa0e229d6d966419e4 Mon Sep 17 00:00:00 2001
From: caojiazhi <352139040@qq.com>
Date: Tue, 24 Dec 2024 17:46:45 +0800
Subject: [PATCH 1/6] Support parquet table write other files

---
 .../cpp/src/CMakeLists.txt                    |   3 +
 .../src/jni/ParquetColumnarBatchJniWriter.cpp | 182 ++++++++++
 .../src/jni/ParquetColumnarBatchJniWriter.h   |  57 +++
 .../omniop-native-reader/java/pom.xml         |   1 +
 .../jni/ ParquetColumnarBatchWriter.java      | 336 ++++++++++++++++++
 .../boostkit/spark/ColumnarPlugin.scala       |   2 +
 .../OmniFileFormatDataWriter.scala            |  31 +-
 .../parquet/ OmniParquetOutputWriter.scala    |  76 ++++
 .../parquet/OmniParquetFileFormat.scala       |  97 ++++-
 9 files changed, 776 insertions(+), 9 deletions(-)
 create mode 100644 omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.cpp
 create mode 100644 omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.h
 create mode 100644 omnioperator/omniop-spark-extension/java/src/main/java/com/huawei/boostkit/spark/jni/ ParquetColumnarBatchWriter.java
 create mode 100644 omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ OmniParquetOutputWriter.scala

diff --git a/omnioperator/omniop-native-reader/cpp/src/CMakeLists.txt b/omnioperator/omniop-native-reader/cpp/src/CMakeLists.txt
index 346db130b..7061e7da7 100644
--- a/omnioperator/omniop-native-reader/cpp/src/CMakeLists.txt
+++ b/omnioperator/omniop-native-reader/cpp/src/CMakeLists.txt
@@ -8,11 +8,13 @@ set (SOURCE_FILES
         jni/OrcColumnarBatchJniReader.cpp
         jni/jni_common.cpp
         jni/ParquetColumnarBatchJniReader.cpp
+        jni/ParquetColumnarBatchJniWriter.cpp
         parquet/ParquetReader.cpp
         parquet/ParquetColumnReader.cpp
         parquet/ParquetTypedRecordReader.cpp
         parquet/ParquetDecoder.cpp
         parquet/ParquetExpression.cpp
+        parquet/ParquetWriter.cpp
         common/UriInfo.cc
         orcfile/OrcFileOverride.cc
         orcfile/OrcHdfsFileOverride.cc
@@ -51,6 +53,7 @@ find_package(ArrowDataset REQUIRED)
 target_include_directories(${PROJ_TARGET} PUBLIC $ENV{JAVA_HOME}/include)
 target_include_directories(${PROJ_TARGET} PUBLIC $ENV{JAVA_HOME}/include/linux)
 target_include_directories(${PROJ_TARGET} PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
+target_include_directories(${PROJ_TARGET} PUBLIC $ENV{OMNI_HOME}/lib/include)
 
 target_link_libraries (${PROJ_TARGET} PUBLIC
         Arrow::arrow_shared
diff --git a/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.cpp b/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.cpp
new file mode 100644
index 000000000..5c822e8b2
--- /dev/null
+++ b/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.cpp
@@ -0,0 +1,182 @@
+/**
+* Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include "ParquetColumnarBatchJniWriter.h"
+#include "jni_common.h"
+#include "parquet/ParquetWriter.h"
+#include "common/UriInfo.h"
+#include "arrow/status.h"
+#include <arrow/type.h>
+#include <arrow/api.h>
+
+using namespace omniruntime::writer;
+using namespace arrow;
+
+static constexpr int32_t DECIMAL_PRECISION_INDEX = 0;
+static constexpr int32_t DECIMAL_SCALE_INDEX = 1;
+
+//定义全局的ParquetWriter指针
+ParquetWriter *pWriter = nullptr;
+
+JNIEXPORT jlong JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_initializeWriter
+    (JNIEnv* env, jobject jObj, jobject jsonObj)
+{
+  JNI_FUNC_START
+  //Get uriStr
+  jstring uri = (jstring)env->CallObjectMethod(jsonObj, jsonMethodString, env->NewStringUTF("uri"));
+  const char *uriStr = env->GetStringUTFChars(uri, JNI_FALSE);
+  std::string uriString(uriStr);
+  env->ReleaseStringUTFChars(uri,uriStr);
+
+  jstring ugiTemp = (jstring)env->CallObjectMethod(jsonObj, jsonMethodString, env->NewStringUTF("ugi"));
+  const char *ugi = env->GetStringUTFChars(ugiTemp, JNI_FALSE);
+  std::string ugiString(ugi);
+  env->ReleaseStringUTFChars(ugiTemp, ugi);
+
+  jstring schemeTemp = (jstring)env->CallObjectMethod(jsonObj, jsonMethodString, env->NewStringUTF("scheme"));
+  const char *scheme = env->GetStringUTFChars(schemeTemp, JNI_FALSE);
+  std::string schemeString(scheme);
+  env->ReleaseStringUTFChars(schemeTemp, scheme);
+
+  jstring hostTemp = (jstring)env->CallObjectMethod(jsonObj, jsonMethodString, env->NewStringUTF("host"));
+  const char *host = env->GetStringUTFChars(hostTemp, JNI_FALSE);
+  std::string hostString(host);
+  env->ReleaseStringUTFChars(hostTemp, host);
+
+  jstring pathTemp = (jstring)env->CallObjectMethod(jsonObj, jsonMethodString, env->NewStringUTF("path"));
+  const char *path = env->GetStringUTFChars(pathTemp, JNI_FALSE);
+  std::string pathString(path);
+  env->ReleaseStringUTFChars(pathTemp, path);
+
+  jint port = (jint)env->CallIntMethod(jsonObj, jsonMethodInt, env->NewStringUTF("port"));
+
+  UriInfo uriInfo(uriString, schemeString, pathString, hostString, std::to_string(port));
+
+  auto state = pWriter->InitRecordWriter(uriInfo, ugiString);
+  if (state != arrow::Status::OK()) {
+    env->ThrowNew(runtimeExceptionClass, state.ToString().c_str());
+    return 0;
+  }
+  return (jlong)(pWriter);
+  JNI_FUNC_END(runtimeExceptionClass)
+}
+
+JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_initializeSchema
+        (JNIEnv* env, jobject obj, jobject job, long writer, jobjectArray fileNames, jintArray fieldTypes, jbooleanArray nullables, jobjectArray decimalParam){
+        if (pWriter == nullptr) {
+            pWriter = new ParquetWriter();
+        }
+        JNI_FUNC_START
+        auto fieldTypesPtr = env->GetIntArrayElements(fieldTypes, JNI_FALSE);
+        auto nullablesPtr = env->GetBooleanArrayElements(nullables, JNI_FALSE);
+        if (fieldTypesPtr == NULL) {
+            throw std::runtime_error("Parquet type ids should not be null");
+        }
+        auto schemeLength = (int32_t)env->GetArrayLength(fieldTypes);
+
+        FieldVector fieldVector;
+        for (int i =0 ;i < schemeLength; i++) {
+            jint  parquetType = fieldTypesPtr[i];
+            jboolean nullable = nullablesPtr[i]
+            jstring fieldName = (jstring)env->GetObjectArrayElement(fieldNames,i);
+            const char *cFieldName = env->GetStringUTFChars(fieldName, nullptr);
+            std::shared_ptr<DateType> writeParquetType;
+            if (static_cast<Type::type>(parquetType) == Type::type DECIMAL) {
+                auto decimalParamArray = (jintArray)env->GetObjectArrayElement(decimalParam, i);
+                auto decimalParamArrayPtr = env->GetIntArrayElements(decimalParamArray,JNI_FALSE);
+                auto precision = decimalParamArrayPtr[DECIMAL_PRECISION_INDEX];
+                pWriter->precision=decimalParamArrayPtr[DECIMAL_PRECISION_INDEX];
+                auto scale = decimalParamArrayPtr[DECIMAL_SCALE_INDEX];
+                pWriter->scale = decimalParamArrayPtr[DECIMAL_SCALE_INDEX];;
+                writeParquetType = decimal128(precision, scale);
+            } else {
+                switch(static_cast<Type::type>(parquetType)) {
+                    case Type::type::BOOL:
+                        writeParquetType = arrow::boolean();
+                        break;
+                    case Type::type::INT16:
+                        writeParquetType = arrow::int16();
+                        break;
+                    case Type::type::INT32:
+                        writeParquetType = arrow::int32();
+                        break;
+                    case Type::type::INT64:
+                        writeParquetType = arrow::int64();
+                        break;
+                    case Type::type::DATE32:
+                        writeParquetType = arrow::date32();
+                        break;
+                    case Type::type::DATE64:
+                        writeParquetType = arrow::date64();
+                        break;
+                    case Type::type::DOUBLE:
+                        writeParquetType = arrow::float64();
+                        break;
+                    case Type::type::STRING:
+                        writeParquetType = arrow::utf8();
+                        break;
+                    default:
+                        throw std::invalid_argument("Unsupported parquet type");
+                }
+            }
+            auto t = field(cFieldName, writeParquetType, nullable);
+            fieldVector.emplace_back(t);
+            env->ReleaseStringUTFChars(fieldName,cFieldName);
+        }
+        auto t = std::make_unique<Schema>(fieldVector);
+        pWriter->schema_ = std::make_shared<Schema>(fieldVector);
+
+        JNI_FUNC_END_VOID(runtimeExceptionClass)
+}
+
+JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_initializeSchema_write(
+    JNIEnv* env,jobject jObj, jlong writer, jlongArray vecNativeId,
+    jintArray omniTypes, jbooleanArray dataColumnsIds, jint numRows) {
+   JNI_FUNC_START
+   ParquetWriter *pWriter = (ParquetWriter *)writer;
+   auto vecNativeIdPtr = env->GetLongArrayElements(vecNativeId, JNI_FALSE);
+   auto colNums = env->GetArrayLength(vecNativeId);
+   auto omniTypesPtr = env->GetIntArrayElements(omniTypes, JNI_FALSE);
+   auto dataColumnsIdsPtr = env->GetBooleanArrayElements(dataColumnsIds, JNI_FALSE);
+   pWriter->write(vecNativeIdPtr, colNums, omniTypesPtr, dataColumnsIdsPtr);
+
+   JNI_FUNC_END(runtimeExceptionClass)
+
+    }
+
+JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_initializeSchema_splitWrite(
+        JNIEnv* env,jobject jObj, jlong writer, jlongArray vecNativeId, jintArray omniTypes, jbooleanArray dataColumnsIds
+        jlong startPos, jlong endPos) {
+    JNI_FUNC_START
+    auto vecNativeIdPtr = env->GetLongArrayElements(vecNativeId, JNI_FALSE);
+    auto colNums = env->GetArrayLength(vecNativeId);
+    auto omniTypesPtr = env->GetIntArrayElements(omniTypes, JNI_FALSE);
+    auto dataColumnsIdsPtr = env->GetBooleanArrayElements(dataColumnsIds, JNI_FALSE);
+    auto writeRows = endPos - startPos;
+    ParquetWriter *pWriter = (ParquetWriter *)writer;
+    pWriter->write(vecNativeIdPtr, colNums, omniTypesPtr, dataColumnsIdsPtr, true, startPos, endPos);
+
+    env->ReleaseLongArrayElements(vecNativeId, vecNativeIdPtr, 0);
+    env->ReleaseIntArrayElements(omniTypes, omniTypesPtr, 0);
+    env->ReleaseBooleanArrayElements(dataColumnsIds, dataColumnsIdsPtr, 0);
+    JNI_FUNC_END(runtimeExceptionClass)
+
+}
+
diff --git a/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.h b/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.h
new file mode 100644
index 000000000..cca9b9b9f
--- /dev/null
+++ b/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.h
@@ -0,0 +1,57 @@
+/**
+* Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef OMNI_RUNTIME_PARQUETCOLUMNARBATCHJNIWRITER_H
+#define OMNI_RUNTIME_PARQUETCOLUMNARBATCHJNIWRITER_H
+
+#include <getopt.h>
+#include <string>
+#include <memory>
+#include <iostream>
+#include <sstream>
+#include <cstdio>
+#include <jni.h>
+#include <json/json.h>
+#include <vector/vector_common.h>
+#include <util/omni_exception.h>
+#include "common/debug.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+JNIEXPORT jlong JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_initializeWriter
+    (JNIEnv* env, jobject jObj, jobject job);
+
+JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_initializeSchema
+        (JNIEnv* env, jobject jObj, long writer, jobjectArray filedNames, jintArray fieldTypes, jbooleanArray nullables, jobjectArray decimalParam);
+
+JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_initializeSchema_write(
+    JNIEnv* env,jobject jObj, jlong writer, jlongArray vecNativeId,
+    jintArray omniTypes, jbooleanArray dataColumnsIds, jint numRows);
+
+JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_initializeSchema_splitWrite(
+        JNIEnv* env,jobject jObj, jlong writer, jlongArray vecNativeId, jintArray omniTypes, jbooleanArray dataColumnsIds
+        jlong startPos, jlong endPos);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/omnioperator/omniop-native-reader/java/pom.xml b/omnioperator/omniop-native-reader/java/pom.xml
index 8f6369aaf..1d5e336ad 100644
--- a/omnioperator/omniop-native-reader/java/pom.xml
+++ b/omnioperator/omniop-native-reader/java/pom.xml
@@ -88,6 +88,7 @@
                         <executable>bash</executable>
                         <arguments>
                             <argument>${cpp.dir}/build.sh</argument>
+                            <argument>debug</argument>>
                             <argument>${plugin.cpp.test}</argument>
                         </arguments>
                         </configuration>
diff --git a/omnioperator/omniop-spark-extension/java/src/main/java/com/huawei/boostkit/spark/jni/ ParquetColumnarBatchWriter.java b/omnioperator/omniop-spark-extension/java/src/main/java/com/huawei/boostkit/spark/jni/ ParquetColumnarBatchWriter.java
new file mode 100644
index 000000000..c253d6963
--- /dev/null
+++ b/omnioperator/omniop-spark-extension/java/src/main/java/com/huawei/boostkit/spark/jni/ ParquetColumnarBatchWriter.java	
@@ -0,0 +1,336 @@
+/*
+ * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved.
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.huawei.boostkit.spark.jni;
+
+import com.huawei.boostkit.scan.jni.ParquetColumnarBatchJniReader;
+import com.huawei.boostkit.write.jni.OrcColumnarBatchJniWriter;
+import com.huawei.boostkit.write.jni.ParquetColumnarBatchJniWriter;
+
+import nova.hetu.omniruntime.vector.IntVec;
+import nova.hetu.omniruntime.vector.*;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.orc.OrcFile;
+import org.apache.spark.sql.catalyst.util.RebaseDateTime;
+import org.apache.spark.sql.execution.vectorized.OmniColumnVector;
+import org.apache.spark.sql.types.BooleanType;
+import org.apache.spark.sql.types.CharType;
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.DateType;
+import org.apache.spark.sql.types.DecimalType;
+import org.apache.spark.sql.types.DoubleType;
+import org.apache.spark.sql.types.IntegerType;
+import org.apache.spark.sql.types.LongType;
+import org.apache.spark.sql.types.ShortType;
+import org.apache.spark.sql.types.StringType;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+import org.apache.spark.sql.types.VarcharType;
+import org.apache.spark.sql.vectorized.ColumnarBatch;
+import org.json.JSONObject;
+import org.apache.spark.sql.catalyst.util.RebaseDateTime;
+
+import java.io.IOException;
+import java.net.URI;
+
+public class ParquetColumnarBatchWriter {
+    public ParquetColumnarBatchWriter() {
+        jniWriter = new ParquetColumnarBatchJniWriter();
+    }
+
+    public enum ParquetLibTypeKind {
+        NA,
+        /// Boolean as 1 bit, LSB bit-packed ordering
+        BOOL,
+
+        /// Unsigned 8-bit little-endian integer
+        UINT8,
+
+        /// Signed 8-bit little-endian integer
+        INT8,
+
+        /// Unsigned 16-bit little-endian integer
+        UINT16,
+
+        /// Signed 16-bit little-endian integer
+        INT16,
+
+        /// Unsigned 32-bit little-endian integer
+        UINT32,
+
+        /// Signed 32-bit little-endian integer
+        INT32,
+
+        /// Unsigned 64-bit little-endian integer
+        UINT64,
+
+        /// Signed 64-bit little-endian integer
+        INT64,
+
+        /// 2-byte floating point value
+        HALF_FLOAT,
+
+        /// 4-byte floating point value
+        FLOAT,
+
+        /// 8-byte floating point value
+        DOUBLE,
+
+        /// UTF8 variable-length string as List<Char>
+        STRING,
+
+        /// Variable-length bytes (no guarantee of UTF8-ness)
+        BINARY,
+
+        /// Fixed-size binary. Each value occupies the same number of bytes
+        FIXED_SIZE_BINARY,
+
+        /// int32_t days since the UNIX epoch
+        DATE32,
+
+        /// int64_t milliseconds since the UNIX epoch
+        DATE64,
+
+        /// Exact timestamp encoded with int64 since UNIX epoch
+        /// Default unit millisecond
+        TIMESTAMP,
+
+        /// Time as signed 32-bit integer, representing either seconds or
+        /// milliseconds since midnight
+        TIME32,
+
+        /// Time as signed 64-bit integer, representing either microseconds or
+        /// nanoseconds since midnight
+        TIME64,
+
+        /// YEAR_MONTH interval in SQL style
+        INTERVAL_MONTHS,
+
+        /// DAY_TIME interval in SQL style
+        INTERVAL_DAY_TIME,
+
+        /// Precision- and scale-based decimal type with 128 bits.
+        DECIMAL128,
+
+        /// Defined for backward-compatibility.
+        // DECIMAL = DECIMAL128,
+
+        /// Precision- and scale-based decimal type with 256 bits.
+        DECIMAL256,
+
+        /// A list of some logical data type
+        LIST,
+
+        /// Struct of logical types
+        STRUCT,
+
+        /// Sparse unions of logical types
+        SPARSE_UNION,
+
+        /// Dense unions of logical types
+        DENSE_UNION,
+
+        /// Dictionary-encoded type, also called "categorical" or "factor"
+        /// in other programming languages. Holds the dictionary value
+        /// type but not the dictionary itself, which is part of the
+        /// ArrayData struct
+        DICTIONARY,
+
+        /// Map, a repeated struct logical type
+        MAP,
+
+        /// Custom data type, implemented by user
+        EXTENSION,
+
+        /// Fixed size list of some logical type
+        FIXED_SIZE_LIST,
+
+        /// Measure of elapsed time in either seconds, milliseconds, microseconds
+        /// or nanoseconds.
+        DURATION,
+
+        /// Like STRING, but with 64-bit offsets
+        LARGE_STRING,
+
+        /// Like BINARY, but with 64-bit offsets
+        LARGE_BINARY,
+
+        /// Like LIST, but with 64-bit offsets
+        LARGE_LIST,
+
+        /// Calendar interval type with three fields.
+        INTERVAL_MONTH_DAY_NANO,
+
+        // Leave this at the end
+        MAX_ID
+    }
+
+    public void initializeWriterJava(Path path) throws IOException {
+        JSONObject writerOptionsJson = new JSONObject();
+        String ugi = UserGroupInformation.getCurrentUser().toString();
+
+        URI uri = path.toUri();
+
+        writerOptionsJson.put("uri", path.toString());
+        writerOptionsJson.put("ugi", ugi);
+
+        writerOptionsJson.put("host", uri.getHost() == null ? "" : uri.getHost());
+        writerOptionsJson.put("scheme", uri.getScheme() == null ? "" : uri.getScheme());
+        writerOptionsJson.put("port", uri.getPort());
+        writerOptionsJson.put("path", uri.getPath() == null ? "" : uri.getPath());
+
+        writer = jniWriter.initializeWriter(writerOptionsJson);
+    }
+
+    public void convertGreGorianToJulian(IntVec intVec, int startPos, int endPos) {
+        int julianValue;
+        for (int rowIndex = startPos; rowIndex < endPos; rowIndex++) {
+            julianValue = RebaseDateTime.rebaseGregorianToJulianDays(intVec.get(rowIndex));
+            intVec.set(rowIndex, julianValue);
+        }
+    }
+
+    public void initializeSchemaJava(StructType dataSchema){
+        int schemaLength = dataSchema.length();
+        String [] fieldNames = new String[schemaLength];
+        int[] fieldTypes = new int[schemaLength];
+        boolean[] nullables = new boolean[schemaLength];
+        String [] metaDataKeys = new String[schemaLength];
+        String [] metaDataValues = new String[schemaLength];
+        for (int i = 0; i < schemaLength; i++){
+            StructField field = dataSchema.fields()[i];
+            fieldNames[i] = field.name();
+            fieldTypes[i] = sparkTypeToParquetLibType(field.dataType());
+            nullables[i] = field.nullable();
+        }
+        jniWriter.initializeSchema(writer, fieldNames, fieldTypes, nullables,extractDecimalParam(dataSchema));
+    }
+
+    public int sparkTypeToParquetLibType(DataType dataType) {
+        if (dataType instanceof BooleanType) {
+            return ParquetLibTypeKind.BOOL.ordinal();
+        } else if (dataType instanceof ShortType) {
+            return ParquetLibTypeKind.INT16.ordinal();
+        } else if (dataType instanceof IntegerType) {
+            IntegerType integerType = (IntegerType) dataType;
+            switch (integerType.defaultSize()){
+                case 1:
+                    return ParquetLibTypeKind.INT8.ordinal();
+                case 2:
+                    return ParquetLibTypeKind.INT16.ordinal();
+                case 4:
+                    return ParquetLibTypeKind.INT32.ordinal();
+                case 8:
+                    return ParquetLibTypeKind.DATE64.ordinal();
+                default:
+                    throw new RuntimeException(
+                            "UnSupport size " + integerType.defaultSize() + " of integer type");
+            }
+        } else if (dataType instanceof LongType) {
+            return ParquetLibTypeKind.INT64.ordinal();
+        } else if (dataType instanceof DateType) {
+            DateType dateType = (DateType) dataType;
+            switch (dateType.defaultSize()){
+                case 4:
+                    return ParquetLibTypeKind.DATE32.ordinal();
+                case 8:
+                    return ParquetLibTypeKind.DATE64.ordinal();
+                default:
+                    throw new RuntimeException(
+                            "UnSupport size " + dateType.defaultSize() + " of date type");
+            }
+        } else if (dataType instanceof DoubleType) {
+            return ParquetLibTypeKind.DOUBLE.ordinal();
+        } else if (dataType instanceof VarcharType) {
+            return ParquetLibTypeKind.STRING.ordinal();
+        } else if (dataType instanceof StringType) {
+            return ParquetLibTypeKind.STRING.ordinal();
+        } else if (dataType instanceof CharType) {
+            return ParquetLibTypeKind.STRING.ordinal();
+        } else if (dataType instanceof DecimalType) {
+            DecimalType decimalType = (DecimalType) dataType;
+            switch (decimalType.defaultSize()){
+                case 8:
+                    return ParquetLibTypeKind.DECIMAL128.ordinal();
+                case 16:
+                    return ParquetLibTypeKind.DECIMAL128.ordinal();
+                default:
+                    throw new RuntimeException(
+                            "UnSupport size " + decimalType.defaultSize() + " of decimal type");
+            }
+        } else {
+            throw new RuntimeException(
+                    "UnSupport type convert  spark type " + dataType.simpleString() + " to parquet lib type");
+        }
+    }
+
+    public int[][] extractDecimalParam(StructType dataSchema) {
+        int paramNum = 2;
+        int precisionIndex = 0;
+        int scaleIndex = 1;
+        int[][] decimalParams = new int[dataSchema.length()][paramNum];
+        for (int i = 0; i < dataSchema.length(); i++) {
+            DataType dataType = dataSchema.fields()[i].dataType();
+            if (dataType instanceof DecimalType) {
+                DecimalType decimal = (DecimalType) dataType;
+                decimalParams[i][precisionIndex] = decimal.precision();
+                decimalParams[i][scaleIndex] = decimal.scale();
+            }
+        }
+        return decimalParams;
+    }
+
+    public void write(int[] omniTypes, boolean[] dataColumnsIds, ColumnarBatch batch) {
+        JSONObject job = new JSONObject();
+
+        long[] vecNativeIds = new long[batch.numCols()];
+        for (int i = 0; i < batch.numCols(); i++) {
+            OmniColumnVector omniVec = (OmniColumnVector) batch.column(i);
+            Vec vec = omniVec.getVec();
+            vecNativeIds[i] = vec.getNativeVector();
+            boolean isDateType = (omniTypes[i] == 8);
+            if (isDateType) {
+                convertGreGorianToJulian((IntVec) vec, 0, batch.numRows());
+            }
+        }
+
+        jniWriter.write(writer, vecNativeIds, omniTypes, dataColumnsIds, batch.numRows());
+    }
+
+    public void splitWrite(int[] omniTypes, int[] allOmniTypes, boolean[] dataColumnsIds, ColumnarBatch inputBatch, long startPos, long endPos) {
+        long[] vecNativeIds = new long[inputBatch.numCols()];
+        for (int i = 0; i < inputBatch.numCols(); i++) {
+            OmniColumnVector omniVec = (OmniColumnVector) inputBatch.column(i);
+            Vec vec = omniVec.getVec();
+            vecNativeIds[i] = vec.getNativeVector();
+            boolean isDateType = (allOmniTypes[i] == 8);
+            if (isDateType) {
+                convertGreGorianToJulian((IntVec) vec, (int) startPos, (int) endPos);
+            }
+        }
+
+        jniWriter.splitWrite(writer, vecNativeIds, omniTypes, dataColumnsIds, startPos, endPos);
+    }
+
+    public long writer;
+
+    public long schema;
+    public ParquetColumnarBatchJniWriter jniWriter;
+}
diff --git a/omnioperator/omniop-spark-extension/java/src/main/scala/com/huawei/boostkit/spark/ColumnarPlugin.scala b/omnioperator/omniop-spark-extension/java/src/main/scala/com/huawei/boostkit/spark/ColumnarPlugin.scala
index d19d1a467..535c44655 100644
--- a/omnioperator/omniop-spark-extension/java/src/main/scala/com/huawei/boostkit/spark/ColumnarPlugin.scala
+++ b/omnioperator/omniop-spark-extension/java/src/main/scala/com/huawei/boostkit/spark/ColumnarPlugin.scala
@@ -41,6 +41,7 @@ import org.apache.spark.sql.catalyst.plans.LeftSemi
 import org.apache.spark.sql.catalyst.plans.logical.Aggregate
 import org.apache.spark.sql.execution.command.{DataWritingCommand, DataWritingCommandExec}
 import org.apache.spark.sql.execution.datasources.orc.{OmniOrcFileFormat, OrcFileFormat}
+import org.apache.spark.sql.execution.datasources.parquet.{OmniParquetFileFormat, ParquetFileFormat}
 import org.apache.spark.sql.execution.datasources.{FileFormat, InsertIntoHadoopFsRelationCommand, OmniInsertIntoHadoopFsRelationCommand}
 import org.apache.spark.sql.execution.util.SparkMemoryUtils.addLeakSafeTaskCompletionListener
 import org.apache.spark.sql.execution.aggregate.PushOrderedLimitThroughAgg
@@ -567,6 +568,7 @@ case class ColumnarPreOverrides(isSupportAdaptive: Boolean = true)
             logInfo(s"Columnar Processing for ${cmd.getClass} is currently supported.")
             val fileFormat: FileFormat = cmd.fileFormat match {
               case _: OrcFileFormat => new OmniOrcFileFormat()
+              case _: ParquetFileFormat => new OmniParquetFileFormat()
               case format =>
                 logInfo(s"Unsupported ${format.getClass} file " +
                   s"format for columnar data write command.")
diff --git a/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/OmniFileFormatDataWriter.scala b/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/OmniFileFormatDataWriter.scala
index f3fe865e0..78fdb1aab 100644
--- a/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/OmniFileFormatDataWriter.scala
+++ b/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/OmniFileFormatDataWriter.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils
 import org.apache.spark.sql.catalyst.expressions.{Cast, Concat, Expression, Literal, ScalaUDF, UnsafeProjection, UnsafeRow}
 import org.apache.spark.sql.connector.write.DataWriter
 import org.apache.spark.sql.execution.datasources.orc.OmniOrcOutputWriter
+import org.apache.spark.sql.execution.datasources.parquet.OmniParquetOutputWriter
 import org.apache.spark.sql.execution.metric.{CustomMetrics, SQLMetric}
 import org.apache.spark.sql.types.StringType
 import org.apache.spark.sql.vectorized.ColumnarBatch
@@ -62,6 +63,9 @@ class OmniSingleDirectoryDataWriter(
       context = taskAttemptContext)
 
     currentWriter match {
+      case _: OmniParquetOutputWriter =>
+              currentWriter.asInstanceOf[OmniParquetOutputWriter]
+                .initialize(description.allColumns, description.dataColumns)
       case _: OmniOrcOutputWriter =>
         currentWriter.asInstanceOf[OmniOrcOutputWriter]
           .initialize(description.allColumns, description.dataColumns)
@@ -235,8 +239,18 @@ abstract class OmniBaseDynamicPartitionDataWriter(
       path = currentPath,
       dataSchema = description.dataColumns.toStructType,
       context = taskAttemptContext)
-    currentWriter.asInstanceOf[OmniOrcOutputWriter]
-      .initialize(description.allColumns, description.dataColumns)
+
+    currentWriter match {
+      case _: OmniParquetOutputWriter =>
+              currentWriter.asInstanceOf[OmniParquetOutputWriter]
+                .initialize(description.allColumns, description.dataColumns)
+      case _: OmniOrcOutputWriter =>
+        currentWriter.asInstanceOf[OmniOrcOutputWriter]
+          .initialize(description.allColumns, description.dataColumns)
+      case _ =>
+        throw new UnsupportedOperationException
+        (s"Unsupported ${currentWriter.getClass} Output writer!")
+    }
     statsTrackers.foreach(_.newFile(currentPath))
   }
 
@@ -266,8 +280,17 @@ abstract class OmniBaseDynamicPartitionDataWriter(
   protected def writeRecord(record: InternalRow, startPos: Long, endPos: Long): Unit = {
     // TODO After add OmniParquetOutPutWriter need extract
     //  a abstract interface named OmniOutPutWriter
-    assert(currentWriter.isInstanceOf[OmniOrcOutputWriter])
-    currentWriter.asInstanceOf[OmniOrcOutputWriter].spiltWrite(record, startPos, endPos)
+    currentWriter match {
+              case _: OmniParquetOutputWriter =>
+                      currentWriter.asInstanceOf[OmniParquetOutputWriter]
+                        .initialize(description.allColumns, description.dataColumns)
+              case _: OmniOrcOutputWriter =>
+                      currentWriter.asInstanceOf[OmniOrcOutputWriter]
+                        .initialize(description.allColumns, description.dataColumns)
+              case _ =>
+                throw new UnsupportedOperationException
+                (s"Unsupported ${currentWriter.getClass} Output writer!")
+    }
 
     statsTrackers.foreach(_.newRow(currentWriter.path, record))
     recordsInFile += record.asInstanceOf[OmniInternalRow].batch.numRows()
diff --git a/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ OmniParquetOutputWriter.scala b/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ OmniParquetOutputWriter.scala
new file mode 100644
index 000000000..6d8653d52
--- /dev/null
+++ b/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ OmniParquetOutputWriter.scala	
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.parquet
+
+import com.huawei.boostkit.spark.expression.OmniExpressionAdaptor.sparkTypeToOmniType
+import com.huawei.boostkit.spark.jni.{OrcColumnarBatchWriter, ParquetColumnarBatchWriter}
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.mapreduce._
+import org.apache.hadoop.security.UserGroupInformation
+import org.apache.parquet.hadoop.ParquetOutputFormat
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.execution.datasources.{OmniFakeRow, OutputWriter}
+import org.apache.spark.sql.types.StructType
+
+import java.net.URI
+
+// NOTE: This class is instantiated and used on executor side only, no need to be serializable.
+class OmniParquetOutputWriter(path: String, dataSchema: StructType,
+                              context: TaskAttemptContext)
+  extends OutputWriter {
+
+  val writer = new ParquetColumnarBatchWriter()
+  var omniTypes: Array[Int] = new Array[Int](0)
+  var dataColumnsIds: Array[Boolean] = new Array[Boolean](0)
+  var allOmniTypes: Array[int] new Array[Int](0)
+
+  def initialize(allColumns: Seq[Attribute], dataColumns: Seq[Attribute]): Unit = {
+    val filePath = new Path(path)
+    val ugi = UserGroupInformation.getCurrentUser.toString
+    writer.initializeSchemaJava(dataSchema)
+    writer.initializeWriterJava(filePath)
+    dataSchema.foreach(field => {
+      omniTypes = omniTypes :+ sparkTypeToOmniType(field.dataType, field.metadata).getId.ordinal()
+    })
+
+  allColumns.toStructType.foreach(field => {
+         allOmniTypes = allOmniTypes :+ sparkTypeToOmniType(field.dataType, field.metadata)
+           .getId.ordinal()
+   })
+    dataColumnsIds = allColumns.map(x => dataColumns.contains(x)).toArray
+  }
+
+  override def write(row: InternalRow): Unit = {
+    assert(row.isInstanceOf[OmniInternalRow])
+    writer.write(omniTypes, dataColumnsIds, row.asInstanceOf[OmniFakeRow].batch)
+  }
+
+  def spiltWrite(row: InternalRow, startPos: Long, endPos: Long): Unit = {
+      assert(row.isInstanceOf[OmniInternalRow])
+      writer.splitWrite(omniTypes, allOmniTypes, dataColumnsIds,
+        row.asInstanceOf[OmniInternalRow].batch, startPos, endPos)
+  }
+
+  override def close(): Unit = {
+  }
+
+  override def path(): String = {
+    path
+  }
+}
diff --git a/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/OmniParquetFileFormat.scala b/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/OmniParquetFileFormat.scala
index 78acf3058..8add989e7 100644
--- a/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/OmniParquetFileFormat.scala
+++ b/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/OmniParquetFileFormat.scala
@@ -23,6 +23,7 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, Path}
 import org.apache.hadoop.mapreduce._
 import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
+import org.apache.parquet.hadoop.util.ContextUtil
 import org.apache.spark.TaskContext
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql._
@@ -33,7 +34,12 @@ import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types._
 import org.apache.spark.util.SerializableConfiguration
 import org.apache.parquet.format.converter.ParquetMetadataConverter.SKIP_ROW_GROUPS
+
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.parquet.hadoop.{ParquetOutputCommitter,ParquetOutputFormat}
+import org.apache.parquet.hadoop.codec.CodecConfig
+import org.apache.parquet.hadoop.ParquetOutputFormat.JobSummaryLevel
+import org.apache.spark.sql.internal.SQLConf
 
 import java.net.URI
 
@@ -48,11 +54,92 @@ class OmniParquetFileFormat extends FileFormat with DataSourceRegister with Logg
   override def equals(other: Any): Boolean = other.isInstanceOf[OmniParquetFileFormat]
 
   override def prepareWrite(
-      sparkSession: SparkSession,
-      job: Job,
-      options: Map[String, String],
-      dataSchema: StructType): OutputWriterFactory = {
-    throw new UnsupportedOperationException()
+                             sparkSession: SparkSession,
+                             job: Job,
+                             options: Map[String, String],
+                             dataSchema: StructType): OutputWriterFactory = {
+    val parquetOptions = new ParquetOptions(options, sparkSession.sessionState.conf)
+
+    val conf = ContextUtil.getConfiguration(job)
+
+    val committerClass =
+      conf.getClass(
+        SQLConf.PARQUET_OUTPUT_COMMITTER_CLASS.key,
+        classOf[ParquetOutputCommitter],
+        classOf[OutputCommitter])
+
+    if (conf.get(SQLConf.PARQUET_OUTPUT_COMMITTER_CLASS.key) == null) {
+      logInfo("Using default output committer for Parquet: " +
+        classOf[ParquetOutputCommitter].getCanonicalName)
+    } else {
+      logInfo("Using user defined output committer for Parquet: " + committerClass.getCanonicalName)
+    }
+
+    conf.setClass(
+      SQLConf.OUTPUT_COMMITTER_CLASS.key,
+      committerClass,
+      classOf[OutputCommitter])
+
+    // We're not really using `ParquetOutputFormat[Row]` for writing data here, because we override
+    // it in `ParquetOutputWriter` to support appending and dynamic partitioning.  The reason why
+    // we set it here is to setup the output committer class to `ParquetOutputCommitter`, which is
+    // bundled with `ParquetOutputFormat[Row]`.
+    job.setOutputFormatClass(classOf[ParquetOutputFormat[Row]])
+
+    ParquetOutputFormat.setWriteSupportClass(job, classOf[ParquetWriteSupport])
+
+    // This metadata is useful for keeping UDTs like Vector/Matrix.
+    ParquetWriteSupport.setSchema(dataSchema, conf)
+
+    // Sets flags for `ParquetWriteSupport`, which converts Catalyst schema to Parquet
+    // schema and writes actual rows to Parquet files.
+    conf.set(
+      SQLConf.PARQUET_WRITE_LEGACY_FORMAT.key,
+      sparkSession.sessionState.conf.writeLegacyParquetFormat.toString)
+
+    conf.set(
+      SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key,
+      sparkSession.sessionState.conf.parquetOutputTimestampType.toString)
+
+    conf.set(
+      SQLConf.PARQUET_FIELD_ID_WRITE_ENABLED.key,
+      sparkSession.sessionState.conf.parquetFieldIdWriteEnabled.toString)
+
+    // Sets compression scheme
+    conf.set(ParquetOutputFormat.COMPRESSION, parquetOptions.compressionCodecClassName)
+
+    // SPARK-15719: Disables writing Parquet summary files by default.
+    if (conf.get(ParquetOutputFormat.JOB_SUMMARY_LEVEL) == null
+      && conf.get(ParquetOutputFormat.ENABLE_JOB_SUMMARY) == null) {
+      conf.setEnum(ParquetOutputFormat.JOB_SUMMARY_LEVEL, JobSummaryLevel.NONE)
+    }
+
+    if (ParquetOutputFormat.getJobSummaryLevel(conf) != JobSummaryLevel.NONE
+      && !classOf[ParquetOutputCommitter].isAssignableFrom(committerClass)) {
+      // output summary is requested, but the class is not a Parquet Committer
+      logWarning(s"Committer $committerClass is not a ParquetOutputCommitter and cannot" +
+        s" create job summaries. " +
+        s"Set Parquet option ${ParquetOutputFormat.JOB_SUMMARY_LEVEL} to NONE.")
+    }
+
+    new OutputWriterFactory {
+      // This OutputWriterFactory instance is deserialized when writing Parquet files on the
+      // executor side without constructing or deserializing ParquetFileFormat. Therefore, we hold
+      // another reference to ParquetLogRedirector.INSTANCE here to ensure the latter class is
+      // initialized.
+      private val parquetLogRedirector = ParquetLogRedirector.INSTANCE
+
+      override def newInstance(
+                                path: String,
+                                dataSchema: StructType,
+                                context: TaskAttemptContext): OutputWriter = {
+        new OmniParquetOutputWriter(path, dataSchema, context)
+      }
+
+      override def getFileExtension(context: TaskAttemptContext): String = {
+        CodecConfig.from(context).getCodec.getExtension + ".parquet"
+      }
+    }
   }
 
   override def inferSchema(
-- 
Gitee


From 87cf16eeeaa6276e73881c089ce4a9c1579bee0e Mon Sep 17 00:00:00 2001
From: caojiazhi <352139040@qq.com>
Date: Tue, 24 Dec 2024 17:47:18 +0800
Subject: [PATCH 2/6] Support parquet table write parquetWriter

---
 .../cpp/src/parquet/ParquetWrier.h            |  49 ++
 .../cpp/src/parquet/ParquetWriter.cpp         | 706 ++++++++++++++++++
 2 files changed, 755 insertions(+)
 create mode 100644 omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWrier.h
 create mode 100644 omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWriter.cpp

diff --git a/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWrier.h b/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWrier.h
new file mode 100644
index 000000000..33fac3e78
--- /dev/null
+++ b/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWrier.h
@@ -0,0 +1,49 @@
+/**
+* Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NATIVE_READER_PARQUETWRITER_H
+#define NATIVE_READER_PARQUETWRITER_H
+
+#endif // NATIVE_READER_PARQUETWRITER_H
+
+#include <vector/vector_common.h>
+#incldue <arrow/filesystem/filesystem.h>
+#include "common/UriInfo.h"
+#include "parquet/arrow/writer"
+
+using namespace arrow::internal;
+ 
+namespace omniruntime::writer {
+class ParquetWriter {
+pubilc:
+	ParquetWriter() {}
+	
+	arrow::Status InitRecordWriter(UriInfo &uri, std::string &ugi)
+	std::share_ptr<arrow::Field> BuildField(const std::string& name, int typeId, bool nullable)
+	void write(long *vetNativeId, int colNums, const int *omniTypes, const unsigned char *dataColumnsIds,
+					bool isSplitWrite = false, long starPos = 0, long endPos = 0);
+	void write();
+
+pubilc:
+	std::unique_ptr<parquet::arrow::FileWriter> arrow_writer;
+	std::shared_ptr<arrow::Schema> schema_;
+	int precision;
+	int scale;
+	};
+}
\ No newline at end of file
diff --git a/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWriter.cpp b/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWriter.cpp
new file mode 100644
index 000000000..e2b8cfa4c
--- /dev/null
+++ b/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWriter.cpp
@@ -0,0 +1,706 @@
+/**
+* Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ParquetWrier.h"
+#incldue "ParquetReade.h"
+#incldue "arrow/array/array_base.h"
+#incldue "arrow/array/array_binary.h"
+#incldue "arrow/array/array_primitive.h"
+#incldue "arrow/array/data.h"
+#include <arrow/type.h>
+#include <arrow/array.h>
+#include <arrow/api.h>
+#incldue "arrow/util/bitmap.h"
+#incldue "arrow/chunked_array.h"
+#incldue "arrow/buffer_builder.h"
+#incldue "arrow/table.h"
+#incldue "arrowadapter/FileSystemAdpter.h"
+#incldue "common/UriInfo.h"
+#incldue "jni/jin_common.h"
+#incldue "parquet/arrow/reader.h"
+#incldue "parquet/exception.h"
+#incldue "parquet/properties.h"
+#include <mutex>
+#incldue <unordered_map>
+#incldue <iostream>
+#incldue <codecvt>
+#incldue <locale>
+#incldue <type/decimal_operations.h>
+
+using namespace arrow;
+using namespace arrow::internal;
+using namespace parquet::arrow;
+using namespace omniruntime::writer;
+using namespace omniruntime::reader;
+
+static std::mutex mutex_;
+
+namespace omniruntime::writer {
+
+//    std::string GetReaderAddr(const std::string address) {
+//        std::string prefix =
+//            "hdfs://OmniOperator:9000/user/hive/warehouse/"
+//            "tpcds_bin_partitioned_varchar_orc_2.db/test_parquet_int";
+//        auto pos = address.find_last_of('/');
+//        std::string suffix = address.substr(pos);
+//        return prefix + suffix;
+//    }
+
+    arrow::Status ParquetWrier::InitRecordWriter(UriInfo &uri, std:: &ugi) {
+
+        // Configure writer settings
+        parquet:WriterProperities::Builder writer_properties;
+
+        //Configure Arrow-specific reader settings
+        parquet:ArrowWriterProperities::Builder arrow_writer_properties;
+
+        std::shared_ptr<io::OutputStream> outputStream;
+
+        //Get the file from filesystem
+        arrow::Status result;
+        mutex_.lock();
+        Filesystem *fs = GetFileSystemPtr(uri, ugi, result);
+        mutex_.unlock();
+        if (fs == nullptr || fs->filesys_ptr == nullptr) {
+          return arrow::Status::IOError(result);
+        }
+
+    std::string path = uri.ToString();
+    ARROW_ASSIGN_OR_RAISE(ouputStream, fs->filesys_ptr->OpenOutputStream(path));
+
+    writer_properties.disable_dictionary();
+
+    // Temporarily use the default value of WriterProperties and
+    auto fileWriter = FileWriter::Open(
+        *schema_,arrow::default_memory_pool(), outputStream,
+        writer_properties.build(),parquet::default_arrow_writer_properties());
+
+    ARROW_ASSIGN_OR_RAISE(arrow_writer, fileWriter);
+
+    // ARROW_RETURN_NOT_OK(fileWriter.ValueOrDie()->Close());
+    auto pool = arrow::default_memory_pool();
+
+    return arrow::Status::OK();
+    }
+
+//std::shared_ptr<arrow::Field>
+//ParquetWrier::BuildField(const std::string &name, int typeId, bool nullable) {
+//    switch (typeId) {
+//        case Type::BOOL:
+//           return std::make_shared<Field>(name, std::make_shared<arrow::BooleanType>(),
+//                                       nullable);
+//        case Type::INT16:
+//            return std::make_shared<Field>(name, std::make_shared<Int16Type>(),
+//                                           nullable);
+//        case Type::INT32:
+//            return std::make_shared<Field>(name, std::make_shared<Int32Type>(),
+//                                               nullable);
+//        case Type::INT64:
+//            return std::make_shared<Field>(name, std::make_shared<Int64Type>(),
+//                                               nullable);
+//        case Type::DATE32:
+//            return std::make_shared<Field>(name, std::make_shared<arrow::Date32Type>(),
+//                                               nullable);
+//        case Type::DATE64:
+//            return std::make_shared<Field>(name, std::make_shared<arrow::Date64Type>(),
+//                                               nullable);
+//        case Type::DOUBLE:
+//            return std::make_shared<Field>(name, std::make_shared<arrow::DoubleType>(),
+//                                               nullable);
+//        case Type::STRING:
+//            return std::make_shared<Field>(name, std::make_shared<arrow::StringType>(),
+//                                               nullable);
+//        case Type::DECIMAL64:
+//            return std::make_shared<Field>(name, std::make_shared<arrow::Decimal64Type>(38,2),
+//                                               nullable);
+//
+//        case Type::DECIMAL128:
+//            return std::make_shared<Field>(name, std::make_shared<arrow::Decimal128Type>(38,2),
+//                                               nullable);
+//            default:
+//              throw parquet::ParquetException("Un supported parquet type", typeId);
+//    }
+//}
+
+std::shared_ptr<::arrow::ChunkedArray> buildBooleanChunk(DataTypeId typeId, BaseVector *baseVector,
+                                                       bool isSplitWrite = false, long startPos = 0,
+                                                       long endPos = 0) {
+    using T = typename NativeType<OMNI_BOOLEAN>::type;
+    auto vector = (Vector<T> *) baseVector;
+
+    if (!isSplitWrite) {
+        startPos = 0;
+        endPos = vector->GetSize();
+        }
+
+    long vectorSize = endPos - startPos;
+    bool values[vectorSize];
+    long index=0;
+    auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
+    arrow::internal::Bitmap bitmap(bitmapBuffer,0,vectorSize);
+    bitmap.SetBitsTo(true);
+
+    //todo why use vector-> Hashll before setbitto will make other colums unormal
+
+    for (long j =startPos; j< endPos; j++) {
+        values[index]=vector->GetValue(j);
+        if (vector->IsNull(j)) {
+            bitmap.SetBitTo(index,false);
+        }
+        index++;
+    }
+
+    TypedBufferBuilder<bool> builder;
+    builder.Resize(vectorSize);
+
+    builder.Append(reinterpret_cast<uint8_t*>(values), vectorSize);
+    auto maybe_buffer = builder.Finish();
+    std::shared_ptr<arrow::Buffer> databuffer = *maybe_buffer;
+
+    std::vector<std::shared_ptr<Buffer>> buffers;
+    buffers.emplace_back(bitmapBuffer);
+    buffers.emplace_back(databuffer);
+
+    auto booleanType = std::make_shared<arrow::BooleanType>();
+    auto arrayData = arrow::ArrayData::Make(booleanType, vectorSize, buffers);
+
+    std::vector<std::shared_ptr<arrow::Array>> arrayVector;
+    auto booleanArray = std::make_shared<arrow::BooleanType>(arrayData);
+    arrayVector.emplace_back(booleanArray);
+
+    return arrow::ChunkedArray::Make(arrayVector, booleanType).ValueOrDie;
+    }
+
+std::shared_ptr<::arrow::ChunkedArray> buildInt16Chunk(DataTypeId typeId, BaseVector *baseVector,
+                                                       bool isSplitWrite = false, long startPos = 0,
+                                                       long endPos = 0) {
+    using T = typename NativeType<OMNI_SHORT>::type;
+    auto vector = (Vector<T> *) baseVector;
+
+    if (!isSplitWrite) {
+        startPos = 0;
+        endPos = vector->GetSize();
+        }
+
+    long vectorSize = endPos - startPos;
+    short values[vectorSize];
+    long index=0;
+
+    auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
+    arrow::internal::Bitmap bitmap(bitmapBuffer,0,vectorSize);
+    bitmap.SetBitsTo(true);
+
+
+
+    for (long j =startPos; j< endPos; j++) {
+        values[index]=vector->GetValue(j);
+        if (vector->IsNull(j)) {
+            bitmap.SetBitTo(index,false);
+        }
+        index++;
+    }
+
+    TypedBufferBuilder<int16_t> builder;
+    builder.Resize(vectorSize);
+    builder.Append(values, vectorSize);
+    auto maybe_buffer = builder.Finish();
+    std::shared_ptr<arrow::Buffer> dataBuffer = *maybe_buffer;
+
+    std::vector<std::shared_ptr<Buffer>> buffers;
+    buffers.emplace_back(bitmapBuffer);
+    buffers.emplace_back(dataBuffer);
+
+    auto int16Type = std::make_shared<Int16Type>();
+    auto arrayData = arrow::ArrayData::Make(int16Type, vectorSize, buffers);
+
+    std::vector<std::shared_ptr<arrow::Array>> arrayVector;
+    auto numericArray = std::make_shared<NumericArray<Int16Type>>(arrayData);
+    arrayVector.emplace_back(numericArray);
+
+    return arrow::ChunkedArray::Make(arrayVector, int16Type).ValueOrDie;
+    }
+
+std::shared_ptr<::arrow::ChunkedArray> buildInt32Chunk(DataTypeId typeId, BaseVector *baseVector,
+                                                       bool isSplitWrite = false, long startPos = 0,
+                                                       long endPos = 0) {
+    using T = typename NativeType<OMNI_INT>::type;
+    auto vector = (Vector<T> *) baseVector;
+
+    if (!isSplitWrite) {
+        startPos = 0;
+        endPos = vector->GetSize();
+        }
+
+    long vectorSize = endPos - startPos;
+    int values[vectorSize];
+    long index=0;
+
+    auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
+    arrow::internal::Bitmap bitmap(bitmapBuffer,0,vectorSize);
+    bitmap.SetBitsTo(true);
+
+
+    for (long j =startPos; j< endPos; j++) {
+        values[index]=vector->GetValue(j);
+        if (vector->IsNull(j)) {
+            bitmap.SetBitTo(index,false);
+        }
+        index++;
+    }
+
+    TypedBufferBuilder<int32_t> builder;
+    builder.Resize(vectorSize);
+
+    builder.Append(values, vectorSize);
+    auto maybe_buffer = builder.Finish();
+    std::shared_ptr<arrow::Buffer> dataBuffer = *maybe_buffer;
+
+    std::vector<std::shared_ptr<Buffer>> buffers;
+
+    buffers.emplace_back(bitmapBuffer);
+    buffers.emplace_back(dataBuffer);
+
+    auto int32Type = std::make_shared<Int32Type>();
+    auto arrayData = arrow::ArrayData::Make(int32Type, vectorSize, buffers);
+
+    std::vector<std::shared_ptr<arrow::Array>> arrayVector;
+    auto numericArray = std::make_shared<NumericArray<Int32Type>>(arrayData);
+    arrayVector.emplace_back(numericArray);
+
+    return arrow::ChunkedArray::Make(arrayVector, int32Type).ValueOrDie;
+    }
+
+std::shared_ptr<arrow::ChunkedArray> buildInt64Chunk(DataTypeId typeId, BaseVector *baseVector,
+                                                       bool isSplitWrite = false, long startPos = 0,
+                                                       long endPos = 0) {
+    using T = typename NativeType<OMNI_LONG>::type;
+    auto vector = (Vector<T> *) baseVector;
+
+    if (!isSplitWrite) {
+        startPos = 0;
+        endPos = vector->GetSize();
+        }
+
+    long vectorSize = endPos - startPos;
+    int64_t values[vectorSize];
+    long index=0;
+
+    auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
+    arrow::internal::Bitmap bitmap(bitmapBuffer,0,vectorSize);
+    bitmap.SetBitsTo(true);
+
+    //todo why use vector-> Hashll before setbitto will make other colums unormal
+
+    for (long j =startPos; j< endPos; j++) {
+        values[index]=vector->GetValue(j);
+        if (vector->IsNull(j)) {
+            bitmap.SetBitTo(index,false);
+        }
+        index++;
+    }
+
+    TypedBufferBuilder<int64_t> builder;
+    builder.Resize(vectorSize);
+
+    builder.Append(values, vectorSize);
+    auto maybe_buffer = builder.Finish();
+    std::shared_ptr<arrow::Buffer> dataBuffer = *maybe_buffer;
+
+    std::vector<std::shared_ptr<Buffer>> buffers;
+
+    buffers.emplace_back(bitmapBuffer);
+    buffers.emplace_back(dataBuffer);
+
+    auto int64Type = std::make_shared<Int64Type>();
+    auto arrayData = arrow::ArrayData::Make(int64Type, vectorSize, buffers);
+
+    std::vector<std::shared_ptr<arrow::Array>> arrayVector;
+    auto numericArray = std::make_shared<NumericArray<Int64Type>>(arrayData);
+    arrayVector.emplace_back(numericArray);
+
+    return arrow::ChunkedArray::Make(arrayVector, int64Type).ValueOrDie;
+    }
+
+std::shared_ptr<arrow::ChunkedArray> buildDate32Chunk(DataTypeId typeId, BaseVector *baseVector,
+                                                       bool isSplitWrite = false, long startPos = 0,
+                                                       long endPos = 0) {
+    using T = typename NativeType<OMNI_DATE32>::type;
+    auto vector = (Vector<T> *) baseVector;
+
+    if (!isSplitWrite) {
+        startPos = 0;
+        endPos = vector->GetSize();
+        }
+
+    long vectorSize = endPos - startPos;
+    int values[vectorSize];
+    long index=0;
+
+    auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
+    arrow::internal::Bitmap bitmap(bitmapBuffer,0,vectorSize);
+    bitmap.SetBitsTo(true);
+
+
+
+    for (long j =startPos; j< endPos; j++) {
+        values[index]=vector->GetValue(j);
+        if (vector->IsNull(j)) {
+            bitmap.SetBitTo(index,false);
+        }
+        index++;
+    }
+
+    TypedBufferBuilder<int32_t> builder;
+    builder.Resize(vectorSize);
+
+    builder.Append(values, vectorSize);
+    auto maybe_buffer = builder.Finish();
+    std::shared_ptr<arrow::Buffer> dataBuffer = *maybe_buffer;
+
+    std::vector<std::shared_ptr<Buffer>> buffers;
+
+    buffers.emplace_back(bitmapBuffer);
+    buffers.emplace_back(dataBuffer);
+
+    auto date32Type = std::make_shared<Date32Type>();
+    auto arrayData = arrow::ArrayData::Make(date32Type, vectorSize, buffers);
+
+    //todo Array or arrow::Array
+    std::vector<std::shared_ptr<arrow::Array>> arrayVector;
+    auto date32Array = std::make_shared<NumericArray<Date32Type>>(arrayData);
+    arrayVector.emplace_back(date32Array);
+
+    return arrow::ChunkedArray::Make(arrayVector, int16Type).ValueOrDie;
+    }
+
+
+std::shared_ptr<arrow::ChunkedArray> buildDate64Chunk(DataTypeId typeId, BaseVector *baseVector,
+                                                       bool isSplitWrite = false, long startPos = 0,
+                                                       long endPos = 0) {
+    using T = typename NativeType<OMNI_DATE64>::type;
+    auto vector = (Vector<T> *) baseVector;
+
+    if (!isSplitWrite) {
+        startPos = 0;
+        endPos = vector->GetSize();
+        }
+
+    long vectorSize = endPos - startPos;
+    int64_t values[vectorSize];
+    long index=0;
+
+    auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
+    arrow::internal::Bitmap bitmap(bitmapBuffer,0,vectorSize);
+    bitmap.SetBitsTo(true);
+
+
+    for (long j =startPos; j< endPos; j++) {
+        values[index]=vector->GetValue(j);
+        if (vector->IsNull(j)) {
+            bitmap.SetBitTo(index,false);
+        }
+        index++;
+    }
+
+    TypedBufferBuilder<int64_t> builder;
+    builder.Resize(vectorSize);
+    builder.Append(values, vectorSize);
+    auto maybe_buffer = builder.Finish();
+    std::shared_ptr<arrow::Buffer> dataBuffer = *maybe_buffer;
+
+    std::vector<std::shared_ptr<Buffer>> buffers;
+    buffers.emplace_back(bitmapBuffer);
+    buffers.emplace_back(dataBuffer);
+
+    auto date64Type = std::make_shared<arrow::Date64Type>();
+    auto arrayData = arrow::ArrayData::Make(date64Type, vectorSize, buffers);
+
+    std::vector<std::shared_ptr<arrow::Array>> arrayVector;
+    auto date64Array = std::make_shared<NumericArray<arrow::Date64Type>>(arrayData);
+    arrayVector.emplace_back(date64Array);
+
+    return arrow::ChunkedArray::Make(arrayVector, date64Type).ValueOrDie;
+    }
+
+std::shared_ptr<arrow::ChunkedArray> buildDoubleChunk(DataTypeId typeId, BaseVector *baseVector,
+                                                       bool isSplitWrite = false, long startPos = 0,
+                                                       long endPos = 0) {
+    using T = typename NativeType<OMNI_DOUBLE>::type;
+    auto vector = (Vector<T> *) baseVector;
+
+    if (!isSplitWrite) {
+        startPos = 0;
+        endPos = vector->GetSize();
+        }
+
+    long vectorSize = endPos - startPos;
+    double values[vectorSize];
+    long index=0;
+
+    auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
+    arrow::internal::Bitmap bitmap(bitmapBuffer,0,vectorSize);
+    bitmap.SetBitsTo(true);
+
+
+    for (long j =startPos; j< endPos; j++) {
+        values[index]=vector->GetValue(j);
+        if (vector->IsNull(j)) {
+            bitmap.SetBitTo(index,false);
+        }
+        index++;
+    }
+
+    TypedBufferBuilder<double> builder;
+    builder.Resize(vectorSize);
+
+    builder.Append(values, vectorSize);
+    auto maybe_buffer = builder.Finish();
+    std::shared_ptr<arrow::Buffer> dataBuffer = *maybe_buffer;
+
+    std::vector<std::shared_ptr<Buffer>> buffers;
+    buffers.emplace_back(bitmapBuffer);
+    buffers.emplace_back(dataBuffer);
+
+    auto doubleType = std::make_shared<arrow::DoubleType>();
+    auto arrayData = arrow::ArrayData::Make(doubleType, vectorSize, buffers);
+
+    std::vector<std::shared_ptr<arrow::Array>> arrayVector;
+    auto doubleArray = std::make_shared<NumericArray<arrow::DoubleType>>(arrayData);
+    arrayVector.emplace_back(doubleArray);
+
+    return arrow::ChunkedArray::Make(arrayVector, doubleType).ValueOrDie;
+    }
+
+std::shared_ptr<arrow::ChunkedArray> buildVarcharChunk(DataTypeId typeId, BaseVector *baseVector,
+                                                       bool isSplitWrite = false, long startPos = 0,
+                                                       long endPos = 0) {
+    auto vector = dynamic_cast<Vector<LargeStringContainer<std::string_view>> *>(baseVector);
+
+    if (!isSplitWrite) {
+        startPos = 0;
+        endPos = vector->GetSize();
+        }
+
+    long vectorSize = endPos - startPos;
+    auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
+    arrow::internal::Bitmap bitmap(bitmapBuffer,0,vectorSize);
+    bitmap.SetBitsTo(true);
+
+
+    TypedBufferBuilder<int32_t> offsetsBuilder;
+    TypedBufferBuilder<char> valuesBuilder;
+    int32_t currentOffset = 0;
+    offsetsBuilder.Append(0);
+    valuesBuilder.Resize(vectorSize);
+
+    long index=0;
+    for (long j =startPos; j< endPos; j++) {
+        if (vector->IsNull(j)) {
+            bitmap.SetBitTo(index,false);
+        }
+        index++;
+        std::string strValue = std::string(vector->GetValue(j));
+        const char* cStr = strValue.c_str();
+        size_t length=strValue.length();
+        char* charArray = new char[length+1];
+        strcpy(charArray, cStr);
+        current_offset += length;
+        offsetsBuilder.Append(current_offset);
+        valuesBuilder.Append(charArray,length);
+        delete [] charArray;
+    }
+
+    auto offsetsBuffer = offsetsBuilder.Finish().ValueOrDie();
+    auto valuesBuffer = valuesBuilder.Finish().ValueOrDie();
+
+    std::vector<std::shared_ptr<Buffer>> buffers;
+
+    buffers.emplace_back(bitmapBuffer);
+    buffers.emplace_back(offsetsBuffer);
+    buffers.emplace_back(valuesBuffer);
+
+    auto utf8Type = std::make_shared<arrow::StringType>();
+    auto arrayData = arrow::ArrayData::Make(utf8Type, vectorSize, buffers);
+
+    std::vector<std::shared_ptr<arrow::Array>> arrayVector;
+    auto stringArray = std::make_shared<StringArray>(arrayData);
+    arrayVector.emplace_back(stringArray);
+
+    return arrow::ChunkedArray::Make(arrayVector, utf8Type).ValueOrDie;
+    }
+
+//arrow parquet don't have Decimal64 Type ,use Decimal128 instead of it
+std::shared_ptr<arrow::ChunkedArray> buildDecimal64Chunk(DataTypeId typeId, BaseVector *baseVector, int precision, int scale,
+                                                       bool isSplitWrite = false, long startPos = 0,
+                                                       long endPos = 0) {
+    using T = typename NativeType<OMNI_DECIMAL64>::type;
+    auto vector = (Vector<T> *) baseVector;
+
+    if (!isSplitWrite) {
+        startPos = 0;
+        endPos = vector->GetSize();
+        }
+
+    long vectorSize = endPos - startPos;
+    auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
+    arrow::internal::Bitmap bitmap(bitmapBuffer,0,vectorSize);
+    bitmap.SetBitsTo(true);
+
+    BufferBuilder builder;
+    builder.Resize(vectorSize);
+    std::vector<arrow::Decimal128> decimalArray;
+
+    long index=0;
+    for (long j =startPos; j< endPos; j++) {
+
+        BasicDecimal128 basicDecimal128(0,vector->GetValue(j));
+        decimalArray.emplace_back(BasicDecimal128(basicDecimal128));
+        if (vector->IsNull(j)) {
+            bitmap.SetBitTo(index,false);
+        }
+        index++;
+    }
+
+    builder.Append(decimalArray.data(), decimalArray.size()*16);
+    auto maybe_buffer = builder.Finish();
+    std::shared_ptr<arrow::Buffer> dataBuffer = *maybe_buffer;
+
+    std::vector<std::shared_ptr<Buffer>> buffers;
+    buffers.emplace_back(bitmapBuffer);
+    buffers.emplace_back(dataBuffer);
+
+    auto decimal128Type = std::make_shared<arrow::Decimal128Type>(precision,scale);
+    auto arrayData = arrow::ArrayData::Make(Decimal128Type, vectorSize, buffers);
+
+    std::vector<std::shared_ptr<arrow::Array>> arrayVector;
+    auto decimal128Array = std::make_shared<Decimal128Array>(arrayData);
+    arrayVector.emplace_back(decimal128Array);
+
+    return arrow::ChunkedArray::Make(arrayVector, decimal128Type).ValueOrDie;
+    }
+
+
+std::shared_ptr<arrow::ChunkedArray> buildDecimal128Chunk(DataTypeId typeId, BaseVector *baseVector, int precision, int scale,
+                                                       bool isSplitWrite = false, long startPos = 0,
+                                                       long endPos = 0) {
+    using T = typename NativeType<OMNI_DECIMAL128>::type;
+    auto vector = (Vector<T> *) baseVector;
+
+    if (!isSplitWrite) {
+        startPos = 0;
+        endPos = vector->GetSize();
+        }
+
+    long vectorSize = endPos - startPos;
+    auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
+    arrow::internal::Bitmap bitmap(bitmapBuffer,0,vectorSize);
+    bitmap.SetBitsTo(true);
+
+    BufferBuilder builder;
+    builder.Resize(vectorSize);
+    std::vector<arrow::Decimal128> decimalArray;
+
+    long index=0;
+    for (long j =startPos; j< endPos; j++) {
+        auto decimalValue=vector->GetValue(j)
+        BasicDecimal128 basicDecimal128(vector->GetValue(j).HighBits(),vector->GetValue(j).LowBits());
+        decimalArray.emplace_back(BasicDecimal128(basicDecimal128));
+
+        decimalArray.back().Rescale(2,2);
+        if (vector->IsNull(j)) {
+            bitmap.SetBitTo(index,false);
+        }
+        index++;
+    }
+
+    builder.Append(decimalArray.data(), decimalArray.size()*16);
+    auto maybe_buffer = builder.Finish();
+    std::shared_ptr<arrow::Buffer> dataBuffer = *maybe_buffer;
+
+    std::vector<std::shared_ptr<Buffer>> buffers;
+    buffers.emplace_back(bitmapBuffer);
+    buffers.emplace_back(dataBuffer);
+
+    auto decimal128Type = std::make_shared<arrow::Decimal128Type>(precision,scale);
+    auto arrayData = arrow::ArrayData::Make(Decimal128Type, vectorSize, buffers);
+
+    std::vector<std::shared_ptr<arrow::Array>> arrayVector;
+    auto decimal128Array = std::make_shared<Decimal128Array>(arrayData);
+    arrayVector.emplace_back(decimal128Array);
+
+    return arrow::ChunkedArray::Make(arrayVector, decimal128Type).ValueOrDie;
+    }
+
+void ParquetWrier::write(long * vecNativeId, Int colNums,
+                         const int *omniTypes,
+                         const unsigned char *dataColumnsIds,
+                         bool isSplitWrite, long startPos, long endPos) {
+    std::vector<std::shared_ptr<::arrow::ChunkedArray>> chunks;
+    for (int i = 0; i < colNums; ++i) {
+       if (!dataColumsIds[i]) {
+           continue;
+       }
+
+    auto vec = (BaseVector *)vecNativeId[i];
+    auto typeId = static_cast<DataTypeId>(omniTypes[i]);
+    switch (typeId) {
+        case OMNI_BOOLEAN:
+            chunks.emplace_back(buildBooleanChunk(typeId, vec, isSplitWrite, startPos, endPos));
+            break;
+        case OMNI_SHORT:
+            chunks.emplace_back(buildInt16Chunk(typeId, vec, isSplitWrite, startPos, endPos));
+            break;
+        case OMNI_INT:
+            chunks.emplace_back(buildInt32Chunk(typeId, vec, isSplitWrite, startPos, endPos));
+            break;
+        case OMNI_LONG:
+            chunks.emplace_back(buildInt64Chunk(typeId, vec, isSplitWrite, startPos, endPos));
+            break;
+        case OMNI_DATE32:
+            chunks.emplace_back(buildDATE32Chunk(typeId, vec, isSplitWrite, startPos, endPos));
+            break;
+        case OMNI_DATE64:
+            chunks.emplace_back(buildDATE64Chunk(typeId, vec, isSplitWrite, startPos, endPos));
+            break;
+        case OMNI_DOUBLE:
+            chunks.emplace_back(buildDOUBLEChunk(typeId, vec, isSplitWrite, startPos, endPos));
+            break;
+        case OMNI_VARCHAR:
+            chunks.emplace_back(buildVARCHARChunk(typeId, vec, isSplitWrite, startPos, endPos));
+            break;
+        case OMNI_DECIMAL64:
+            chunks.emplace_back(buildDECIMAL64Chunk(typeId, vec, precision, scale, isSplitWrite, startPos, endPos));
+            break;
+        case OMNI_DECIMAL128:
+            chunks.emplace_back(buildDECIMAL128Chunk(typeId, vec, precision, scale, isSplitWrite, startPos, endPos));
+            break;
+        default:
+            throw std::runtime_error(
+                "Native columnar write not support for this type: " + typedId)
+
+        }
+    }
+     auto numRows = (chunks,empty() ? 0 : chunks[0]->length());
+
+     auto table = arrow::Table::Make(schema_, std::move(chunks), numRows);
+     PARQUET_THROW_NOT_OK(arrow_writer->WriterTable(*table));
+     PARQUET_THROW_NOT_OK(arrow_writer->Close());
+}
+
+}//namespace omniruntime::writer
-- 
Gitee


From f12e6e9cc6e709e358769515f181f0e5ebfe73cb Mon Sep 17 00:00:00 2001
From: caojiazhi <352139040@qq.com>
Date: Thu, 21 Nov 2024 16:38:03 +0800
Subject: [PATCH 3/6] Parquet table write migration writing error from yellow
 develop

---
 .../src/jni/ParquetColumnarBatchJniWriter.cpp |  17 +-
 .../src/jni/ParquetColumnarBatchJniWriter.h   |   4 +-
 .../cpp/src/parquet/ParquetWrier.h            |  17 +-
 .../cpp/src/parquet/ParquetWriter.cpp         | 148 ++++++++++--------
 .../OmniFileFormatDataWriter.scala            |  15 +-
 .../parquet/ OmniParquetOutputWriter.scala    |   2 +-
 6 files changed, 108 insertions(+), 95 deletions(-)

diff --git a/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.cpp b/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.cpp
index 5c822e8b2..9dceae8d5 100644
--- a/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.cpp
+++ b/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.cpp
@@ -35,8 +35,8 @@ static constexpr int32_t DECIMAL_SCALE_INDEX = 1;
 //定义全局的ParquetWriter指针
 ParquetWriter *pWriter = nullptr;
 
-JNIEXPORT jlong JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_initializeWriter
-    (JNIEnv* env, jobject jObj, jobject jsonObj)
+JNIEXPORT jlong JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_initializeWriter(JNIEnv* env,
+                                                                                                          jobject jObj, jobject jsonObj)
 {
   JNI_FUNC_START
   //Get uriStr
@@ -79,7 +79,7 @@ JNIEXPORT jlong JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJ
 }
 
 JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_initializeSchema
-        (JNIEnv* env, jobject obj, jobject job, long writer, jobjectArray fileNames, jintArray fieldTypes, jbooleanArray nullables, jobjectArray decimalParam){
+        (JNIEnv* env, jobject JObj, long writer, jobjectArray fileNames, jintArray fieldTypes, jbooleanArray nullables, jobjectArray decimalParam){
         if (pWriter == nullptr) {
             pWriter = new ParquetWriter();
         }
@@ -97,14 +97,14 @@ JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJn
             jboolean nullable = nullablesPtr[i]
             jstring fieldName = (jstring)env->GetObjectArrayElement(fieldNames,i);
             const char *cFieldName = env->GetStringUTFChars(fieldName, nullptr);
-            std::shared_ptr<DateType> writeParquetType;
+            std::shared_ptr<DataType> writeParquetType;
             if (static_cast<Type::type>(parquetType) == Type::type DECIMAL) {
                 auto decimalParamArray = (jintArray)env->GetObjectArrayElement(decimalParam, i);
                 auto decimalParamArrayPtr = env->GetIntArrayElements(decimalParamArray,JNI_FALSE);
                 auto precision = decimalParamArrayPtr[DECIMAL_PRECISION_INDEX];
                 pWriter->precision=decimalParamArrayPtr[DECIMAL_PRECISION_INDEX];
                 auto scale = decimalParamArrayPtr[DECIMAL_SCALE_INDEX];
-                pWriter->scale = decimalParamArrayPtr[DECIMAL_SCALE_INDEX];;
+                pWriter->scale = decimalParamArrayPtr[DECIMAL_SCALE_INDEX];
                 writeParquetType = decimal128(precision, scale);
             } else {
                 switch(static_cast<Type::type>(parquetType)) {
@@ -146,7 +146,8 @@ JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJn
         JNI_FUNC_END_VOID(runtimeExceptionClass)
 }
 
-JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_initializeSchema_write(
+JNIEXPORT void JNICALL
+Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_write(
     JNIEnv* env,jobject jObj, jlong writer, jlongArray vecNativeId,
     jintArray omniTypes, jbooleanArray dataColumnsIds, jint numRows) {
    JNI_FUNC_START
@@ -161,7 +162,7 @@ JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJn
 
     }
 
-JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_initializeSchema_splitWrite(
+JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_splitWrite(
         JNIEnv* env,jobject jObj, jlong writer, jlongArray vecNativeId, jintArray omniTypes, jbooleanArray dataColumnsIds
         jlong startPos, jlong endPos) {
     JNI_FUNC_START
@@ -176,7 +177,7 @@ JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJn
     env->ReleaseLongArrayElements(vecNativeId, vecNativeIdPtr, 0);
     env->ReleaseIntArrayElements(omniTypes, omniTypesPtr, 0);
     env->ReleaseBooleanArrayElements(dataColumnsIds, dataColumnsIdsPtr, 0);
-    JNI_FUNC_END(runtimeExceptionClass)
+    JNI_FUNC_END_VOID(runtimeExceptionClass)
 
 }
 
diff --git a/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.h b/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.h
index cca9b9b9f..0cac9a7be 100644
--- a/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.h
+++ b/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.h
@@ -43,11 +43,11 @@ JNIEXPORT jlong JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJ
 JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_initializeSchema
         (JNIEnv* env, jobject jObj, long writer, jobjectArray filedNames, jintArray fieldTypes, jbooleanArray nullables, jobjectArray decimalParam);
 
-JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_initializeSchema_write(
+JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_write(
     JNIEnv* env,jobject jObj, jlong writer, jlongArray vecNativeId,
     jintArray omniTypes, jbooleanArray dataColumnsIds, jint numRows);
 
-JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_initializeSchema_splitWrite(
+JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_splitWrite(
         JNIEnv* env,jobject jObj, jlong writer, jlongArray vecNativeId, jintArray omniTypes, jbooleanArray dataColumnsIds
         jlong startPos, jlong endPos);
 
diff --git a/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWrier.h b/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWrier.h
index 33fac3e78..3a5215e6e 100644
--- a/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWrier.h
+++ b/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWrier.h
@@ -23,27 +23,28 @@
 #endif // NATIVE_READER_PARQUETWRITER_H
 
 #include <vector/vector_common.h>
-#incldue <arrow/filesystem/filesystem.h>
+#include <arrow/filesystem/filesystem.h>
 #include "common/UriInfo.h"
-#include "parquet/arrow/writer"
+#include "parquet/arrow/writer.h"
 
 using namespace arrow::internal;
  
 namespace omniruntime::writer {
 class ParquetWriter {
-pubilc:
+public:
 	ParquetWriter() {}
 	
 	arrow::Status InitRecordWriter(UriInfo &uri, std::string &ugi)
 	std::share_ptr<arrow::Field> BuildField(const std::string& name, int typeId, bool nullable)
-	void write(long *vetNativeId, int colNums, const int *omniTypes, const unsigned char *dataColumnsIds,
-					bool isSplitWrite = false, long starPos = 0, long endPos = 0);
+	void write(long *vecNativeId, int colNums, const int *omniTypes, const unsigned char *dataColumnsIds,
+					bool isSplitWrite = false, long starPos = 0,
+					long endPos = 0);
 	void write();
 
-pubilc:
+public:
 	std::unique_ptr<parquet::arrow::FileWriter> arrow_writer;
 	std::shared_ptr<arrow::Schema> schema_;
-	int precision;
-	int scale;
+	std::vector<int> precisions;
+	std::vector<int> scales;
 	};
 }
\ No newline at end of file
diff --git a/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWriter.cpp b/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWriter.cpp
index e2b8cfa4c..b4ec12071 100644
--- a/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWriter.cpp
+++ b/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWriter.cpp
@@ -17,31 +17,32 @@
  * limitations under the License.
  */
 
-#include "ParquetWrier.h"
-#incldue "ParquetReade.h"
-#incldue "arrow/array/array_base.h"
-#incldue "arrow/array/array_binary.h"
-#incldue "arrow/array/array_primitive.h"
-#incldue "arrow/array/data.h"
+#include "ParquetWriter.h"
+#include "ParquetReader.h"
+#include "arrow/array/array_base.h"
+#include "arrow/array/array_binary.h"
+#include "arrow/array/array_primitive.h"
+#include "arrow/array/data.h"
 #include <arrow/type.h>
 #include <arrow/array.h>
 #include <arrow/api.h>
-#incldue "arrow/util/bitmap.h"
-#incldue "arrow/chunked_array.h"
-#incldue "arrow/buffer_builder.h"
-#incldue "arrow/table.h"
-#incldue "arrowadapter/FileSystemAdpter.h"
-#incldue "common/UriInfo.h"
-#incldue "jni/jin_common.h"
-#incldue "parquet/arrow/reader.h"
-#incldue "parquet/exception.h"
-#incldue "parquet/properties.h"
+#include "arrow/util/bitmap.h"
+#include "arrow/chunked_array.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/table.h"
+#include "arrowadapter/FileSystemAdapter.h"
+#include "common/UriInfo.h"
+#include "jni/jni_common.h"
+#include "parquet/arrow/reader.h"
+#include "parquet/exception.h"
+#include "parquet/properties.h"
 #include <mutex>
-#incldue <unordered_map>
-#incldue <iostream>
-#incldue <codecvt>
-#incldue <locale>
-#incldue <type/decimal_operations.h>
+#include <unordered_map>
+#include <iostream>
+#include <codecvt>
+#include <locale>
+#include <type/decimal_operations.h>
+
 
 using namespace arrow;
 using namespace arrow::internal;
@@ -53,7 +54,7 @@ static std::mutex mutex_;
 
 namespace omniruntime::writer {
 
-//    std::string GetReaderAddr(const std::string address) {
+//    std::string GetReadAddr(const std::string address) {
 //        std::string prefix =
 //            "hdfs://OmniOperator:9000/user/hive/warehouse/"
 //            "tpcds_bin_partitioned_varchar_orc_2.db/test_parquet_int";
@@ -62,13 +63,13 @@ namespace omniruntime::writer {
 //        return prefix + suffix;
 //    }
 
-    arrow::Status ParquetWrier::InitRecordWriter(UriInfo &uri, std:: &ugi) {
+    arrow::Status ParquetWriter::InitRecordWriter(UriInfo &uri, std::string &ugi) {
 
         // Configure writer settings
-        parquet:WriterProperities::Builder writer_properties;
+        parquet::WriterProperties::Builder writer_properties;
 
         //Configure Arrow-specific reader settings
-        parquet:ArrowWriterProperities::Builder arrow_writer_properties;
+        parquet::ArrowWriterProperties::Builder arrow_writer_properties;
 
         std::shared_ptr<io::OutputStream> outputStream;
 
@@ -82,7 +83,7 @@ namespace omniruntime::writer {
         }
 
     std::string path = uri.ToString();
-    ARROW_ASSIGN_OR_RAISE(ouputStream, fs->filesys_ptr->OpenOutputStream(path));
+    ARROW_ASSIGN_OR_RAISE(outputStream, fs->filesys_ptr->OpenOutputStream(path));
 
     writer_properties.disable_dictionary();
 
@@ -100,7 +101,7 @@ namespace omniruntime::writer {
     }
 
 //std::shared_ptr<arrow::Field>
-//ParquetWrier::BuildField(const std::string &name, int typeId, bool nullable) {
+//ParquetWriter::BuildField(const std::string &name, int typeId, bool nullable) {
 //    switch (typeId) {
 //        case Type::BOOL:
 //           return std::make_shared<Field>(name, std::make_shared<arrow::BooleanType>(),
@@ -156,7 +157,7 @@ std::shared_ptr<::arrow::ChunkedArray> buildBooleanChunk(DataTypeId typeId, Base
     arrow::internal::Bitmap bitmap(bitmapBuffer,0,vectorSize);
     bitmap.SetBitsTo(true);
 
-    //todo why use vector-> Hashll before setbitto will make other colums unormal
+    //todo why use vector-> Hashnull before setbitto will make other colums unnormal
 
     for (long j =startPos; j< endPos; j++) {
         values[index]=vector->GetValue(j);
@@ -181,10 +182,10 @@ std::shared_ptr<::arrow::ChunkedArray> buildBooleanChunk(DataTypeId typeId, Base
     auto arrayData = arrow::ArrayData::Make(booleanType, vectorSize, buffers);
 
     std::vector<std::shared_ptr<arrow::Array>> arrayVector;
-    auto booleanArray = std::make_shared<arrow::BooleanType>(arrayData);
+    auto booleanArray = std::make_shared<arrow::BooleanArray>(arrayData);
     arrayVector.emplace_back(booleanArray);
 
-    return arrow::ChunkedArray::Make(arrayVector, booleanType).ValueOrDie;
+    return arrow::ChunkedArray::Make(arrayVector, booleanType).ValueOrDie();
     }
 
 std::shared_ptr<::arrow::ChunkedArray> buildInt16Chunk(DataTypeId typeId, BaseVector *baseVector,
@@ -220,7 +221,7 @@ std::shared_ptr<::arrow::ChunkedArray> buildInt16Chunk(DataTypeId typeId, BaseVe
     builder.Resize(vectorSize);
     builder.Append(values, vectorSize);
     auto maybe_buffer = builder.Finish();
-    std::shared_ptr<arrow::Buffer> dataBuffer = *maybe_buffer;
+    std::shared_ptr<Buffer> dataBuffer = *maybe_buffer;
 
     std::vector<std::shared_ptr<Buffer>> buffers;
     buffers.emplace_back(bitmapBuffer);
@@ -229,11 +230,11 @@ std::shared_ptr<::arrow::ChunkedArray> buildInt16Chunk(DataTypeId typeId, BaseVe
     auto int16Type = std::make_shared<Int16Type>();
     auto arrayData = arrow::ArrayData::Make(int16Type, vectorSize, buffers);
 
-    std::vector<std::shared_ptr<arrow::Array>> arrayVector;
+    std::vector<std::shared_ptr<Array>> arrayVector;
     auto numericArray = std::make_shared<NumericArray<Int16Type>>(arrayData);
     arrayVector.emplace_back(numericArray);
 
-    return arrow::ChunkedArray::Make(arrayVector, int16Type).ValueOrDie;
+    return ChunkedArray::Make(arrayVector, int16Type).ValueOrDie();
     }
 
 std::shared_ptr<::arrow::ChunkedArray> buildInt32Chunk(DataTypeId typeId, BaseVector *baseVector,
@@ -269,7 +270,7 @@ std::shared_ptr<::arrow::ChunkedArray> buildInt32Chunk(DataTypeId typeId, BaseVe
 
     builder.Append(values, vectorSize);
     auto maybe_buffer = builder.Finish();
-    std::shared_ptr<arrow::Buffer> dataBuffer = *maybe_buffer;
+    std::shared_ptr<Buffer> dataBuffer = *maybe_buffer;
 
     std::vector<std::shared_ptr<Buffer>> buffers;
 
@@ -279,14 +280,14 @@ std::shared_ptr<::arrow::ChunkedArray> buildInt32Chunk(DataTypeId typeId, BaseVe
     auto int32Type = std::make_shared<Int32Type>();
     auto arrayData = arrow::ArrayData::Make(int32Type, vectorSize, buffers);
 
-    std::vector<std::shared_ptr<arrow::Array>> arrayVector;
+    std::vector<std::shared_ptr<Array>> arrayVector;
     auto numericArray = std::make_shared<NumericArray<Int32Type>>(arrayData);
     arrayVector.emplace_back(numericArray);
 
-    return arrow::ChunkedArray::Make(arrayVector, int32Type).ValueOrDie;
+    return ChunkedArray::Make(arrayVector, int32Type).ValueOrDie();
     }
 
-std::shared_ptr<arrow::ChunkedArray> buildInt64Chunk(DataTypeId typeId, BaseVector *baseVector,
+std::shared_ptr<::arrow::ChunkedArray> buildInt64Chunk(DataTypeId typeId, BaseVector *baseVector,
                                                        bool isSplitWrite = false, long startPos = 0,
                                                        long endPos = 0) {
     using T = typename NativeType<OMNI_LONG>::type;
@@ -320,7 +321,7 @@ std::shared_ptr<arrow::ChunkedArray> buildInt64Chunk(DataTypeId typeId, BaseVect
 
     builder.Append(values, vectorSize);
     auto maybe_buffer = builder.Finish();
-    std::shared_ptr<arrow::Buffer> dataBuffer = *maybe_buffer;
+    std::shared_ptr<Buffer> dataBuffer = *maybe_buffer;
 
     std::vector<std::shared_ptr<Buffer>> buffers;
 
@@ -330,14 +331,14 @@ std::shared_ptr<arrow::ChunkedArray> buildInt64Chunk(DataTypeId typeId, BaseVect
     auto int64Type = std::make_shared<Int64Type>();
     auto arrayData = arrow::ArrayData::Make(int64Type, vectorSize, buffers);
 
-    std::vector<std::shared_ptr<arrow::Array>> arrayVector;
+    std::vector<std::shared_ptr<Array>> arrayVector;
     auto numericArray = std::make_shared<NumericArray<Int64Type>>(arrayData);
     arrayVector.emplace_back(numericArray);
 
-    return arrow::ChunkedArray::Make(arrayVector, int64Type).ValueOrDie;
+    return ChunkedArray::Make(arrayVector, int64Type).ValueOrDie();
     }
 
-std::shared_ptr<arrow::ChunkedArray> buildDate32Chunk(DataTypeId typeId, BaseVector *baseVector,
+std::shared_ptr<ChunkedArray> buildDate32Chunk(DataTypeId typeId, BaseVector *baseVector,
                                                        bool isSplitWrite = false, long startPos = 0,
                                                        long endPos = 0) {
     using T = typename NativeType<OMNI_DATE32>::type;
@@ -371,26 +372,26 @@ std::shared_ptr<arrow::ChunkedArray> buildDate32Chunk(DataTypeId typeId, BaseVec
 
     builder.Append(values, vectorSize);
     auto maybe_buffer = builder.Finish();
-    std::shared_ptr<arrow::Buffer> dataBuffer = *maybe_buffer;
+    std::shared_ptr<Buffer> dataBuffer = *maybe_buffer;
 
     std::vector<std::shared_ptr<Buffer>> buffers;
 
     buffers.emplace_back(bitmapBuffer);
     buffers.emplace_back(dataBuffer);
 
-    auto date32Type = std::make_shared<Date32Type>();
+    auto date32Type = std::make_shared<arrow::Date32Type>();
     auto arrayData = arrow::ArrayData::Make(date32Type, vectorSize, buffers);
 
     //todo Array or arrow::Array
-    std::vector<std::shared_ptr<arrow::Array>> arrayVector;
-    auto date32Array = std::make_shared<NumericArray<Date32Type>>(arrayData);
+    std::vector<std::shared_ptr<Array>> arrayVector;
+    auto date32Array = std::make_shared<NumericArray<arrow::Date32Type>>(arrayData);
     arrayVector.emplace_back(date32Array);
 
-    return arrow::ChunkedArray::Make(arrayVector, int16Type).ValueOrDie;
+    return ChunkedArray::Make(arrayVector, date32Type).ValueOrDie();
     }
 
 
-std::shared_ptr<arrow::ChunkedArray> buildDate64Chunk(DataTypeId typeId, BaseVector *baseVector,
+std::shared_ptr<ChunkedArray> buildDate64Chunk(DataTypeId typeId, BaseVector *baseVector,
                                                        bool isSplitWrite = false, long startPos = 0,
                                                        long endPos = 0) {
     using T = typename NativeType<OMNI_DATE64>::type;
@@ -422,7 +423,7 @@ std::shared_ptr<arrow::ChunkedArray> buildDate64Chunk(DataTypeId typeId, BaseVec
     builder.Resize(vectorSize);
     builder.Append(values, vectorSize);
     auto maybe_buffer = builder.Finish();
-    std::shared_ptr<arrow::Buffer> dataBuffer = *maybe_buffer;
+    std::shared_ptr<Buffer> dataBuffer = *maybe_buffer;
 
     std::vector<std::shared_ptr<Buffer>> buffers;
     buffers.emplace_back(bitmapBuffer);
@@ -431,11 +432,11 @@ std::shared_ptr<arrow::ChunkedArray> buildDate64Chunk(DataTypeId typeId, BaseVec
     auto date64Type = std::make_shared<arrow::Date64Type>();
     auto arrayData = arrow::ArrayData::Make(date64Type, vectorSize, buffers);
 
-    std::vector<std::shared_ptr<arrow::Array>> arrayVector;
+    std::vector<std::shared_ptr<Array>> arrayVector;
     auto date64Array = std::make_shared<NumericArray<arrow::Date64Type>>(arrayData);
     arrayVector.emplace_back(date64Array);
 
-    return arrow::ChunkedArray::Make(arrayVector, date64Type).ValueOrDie;
+    return ChunkedArray::Make(arrayVector, date64Type).ValueOrDie();
     }
 
 std::shared_ptr<arrow::ChunkedArray> buildDoubleChunk(DataTypeId typeId, BaseVector *baseVector,
@@ -471,7 +472,7 @@ std::shared_ptr<arrow::ChunkedArray> buildDoubleChunk(DataTypeId typeId, BaseVec
 
     builder.Append(values, vectorSize);
     auto maybe_buffer = builder.Finish();
-    std::shared_ptr<arrow::Buffer> dataBuffer = *maybe_buffer;
+    std::shared_ptr<Buffer> dataBuffer = *maybe_buffer;
 
     std::vector<std::shared_ptr<Buffer>> buffers;
     buffers.emplace_back(bitmapBuffer);
@@ -480,14 +481,14 @@ std::shared_ptr<arrow::ChunkedArray> buildDoubleChunk(DataTypeId typeId, BaseVec
     auto doubleType = std::make_shared<arrow::DoubleType>();
     auto arrayData = arrow::ArrayData::Make(doubleType, vectorSize, buffers);
 
-    std::vector<std::shared_ptr<arrow::Array>> arrayVector;
+    std::vector<std::shared_ptr<Array>> arrayVector;
     auto doubleArray = std::make_shared<NumericArray<arrow::DoubleType>>(arrayData);
     arrayVector.emplace_back(doubleArray);
 
-    return arrow::ChunkedArray::Make(arrayVector, doubleType).ValueOrDie;
+    return ChunkedArray::Make(arrayVector, doubleType).ValueOrDie();
     }
 
-std::shared_ptr<arrow::ChunkedArray> buildVarcharChunk(DataTypeId typeId, BaseVector *baseVector,
+std::shared_ptr<ChunkedArray> buildVarcharChunk(DataTypeId typeId, BaseVector* baseVector,
                                                        bool isSplitWrite = false, long startPos = 0,
                                                        long endPos = 0) {
     auto vector = dynamic_cast<Vector<LargeStringContainer<std::string_view>> *>(baseVector);
@@ -505,7 +506,7 @@ std::shared_ptr<arrow::ChunkedArray> buildVarcharChunk(DataTypeId typeId, BaseVe
 
     TypedBufferBuilder<int32_t> offsetsBuilder;
     TypedBufferBuilder<char> valuesBuilder;
-    int32_t currentOffset = 0;
+    int32_t current_offset = 0;
     offsetsBuilder.Append(0);
     valuesBuilder.Resize(vectorSize);
 
@@ -538,11 +539,11 @@ std::shared_ptr<arrow::ChunkedArray> buildVarcharChunk(DataTypeId typeId, BaseVe
     auto utf8Type = std::make_shared<arrow::StringType>();
     auto arrayData = arrow::ArrayData::Make(utf8Type, vectorSize, buffers);
 
-    std::vector<std::shared_ptr<arrow::Array>> arrayVector;
+    std::vector<std::shared_ptr<Array>> arrayVector;
     auto stringArray = std::make_shared<StringArray>(arrayData);
     arrayVector.emplace_back(stringArray);
 
-    return arrow::ChunkedArray::Make(arrayVector, utf8Type).ValueOrDie;
+    return ChunkedArray::Make(arrayVector, utf8Type).ValueOrDie();
     }
 
 //arrow parquet don't have Decimal64 Type ,use Decimal128 instead of it
@@ -579,7 +580,7 @@ std::shared_ptr<arrow::ChunkedArray> buildDecimal64Chunk(DataTypeId typeId, Base
 
     builder.Append(decimalArray.data(), decimalArray.size()*16);
     auto maybe_buffer = builder.Finish();
-    std::shared_ptr<arrow::Buffer> dataBuffer = *maybe_buffer;
+    std::shared_ptr<Buffer> dataBuffer = *maybe_buffer;
 
     std::vector<std::shared_ptr<Buffer>> buffers;
     buffers.emplace_back(bitmapBuffer);
@@ -588,15 +589,15 @@ std::shared_ptr<arrow::ChunkedArray> buildDecimal64Chunk(DataTypeId typeId, Base
     auto decimal128Type = std::make_shared<arrow::Decimal128Type>(precision,scale);
     auto arrayData = arrow::ArrayData::Make(Decimal128Type, vectorSize, buffers);
 
-    std::vector<std::shared_ptr<arrow::Array>> arrayVector;
+    std::vector<std::shared_ptr<Array>> arrayVector;
     auto decimal128Array = std::make_shared<Decimal128Array>(arrayData);
     arrayVector.emplace_back(decimal128Array);
 
-    return arrow::ChunkedArray::Make(arrayVector, decimal128Type).ValueOrDie;
+    return ChunkedArray::Make(arrayVector, decimal128Type).ValueOrDie();
     }
 
 
-std::shared_ptr<arrow::ChunkedArray> buildDecimal128Chunk(DataTypeId typeId, BaseVector *baseVector, int precision, int scale,
+std::shared_ptr<ChunkedArray> buildDecimal128Chunk(DataTypeId typeId, BaseVector *baseVector, int precision, int scale,
                                                        bool isSplitWrite = false, long startPos = 0,
                                                        long endPos = 0) {
     using T = typename NativeType<OMNI_DECIMAL128>::type;
@@ -631,7 +632,7 @@ std::shared_ptr<arrow::ChunkedArray> buildDecimal128Chunk(DataTypeId typeId, Bas
 
     builder.Append(decimalArray.data(), decimalArray.size()*16);
     auto maybe_buffer = builder.Finish();
-    std::shared_ptr<arrow::Buffer> dataBuffer = *maybe_buffer;
+    std::shared_ptr<Buffer> dataBuffer = *maybe_buffer;
 
     std::vector<std::shared_ptr<Buffer>> buffers;
     buffers.emplace_back(bitmapBuffer);
@@ -640,20 +641,23 @@ std::shared_ptr<arrow::ChunkedArray> buildDecimal128Chunk(DataTypeId typeId, Bas
     auto decimal128Type = std::make_shared<arrow::Decimal128Type>(precision,scale);
     auto arrayData = arrow::ArrayData::Make(Decimal128Type, vectorSize, buffers);
 
-    std::vector<std::shared_ptr<arrow::Array>> arrayVector;
+    std::vector<std::shared_ptr<Array>> arrayVector;
     auto decimal128Array = std::make_shared<Decimal128Array>(arrayData);
     arrayVector.emplace_back(decimal128Array);
 
-    return arrow::ChunkedArray::Make(arrayVector, decimal128Type).ValueOrDie;
+    return ChunkedArray::Make(arrayVector, decimal128Type).ValueOrDie();
     }
 
-void ParquetWrier::write(long * vecNativeId, Int colNums,
+void ParquetWriter::write(long * vecNativeId, Int colNums,
                          const int *omniTypes,
                          const unsigned char *dataColumnsIds,
                          bool isSplitWrite, long startPos, long endPos) {
     std::vector<std::shared_ptr<::arrow::ChunkedArray>> chunks;
+    int decimalIndex = 0;
+    int precision = 0;
+    int scale =0 ;
     for (int i = 0; i < colNums; ++i) {
-       if (!dataColumsIds[i]) {
+       if (!dataColumnsIds[i]) {
            continue;
        }
 
@@ -685,21 +689,27 @@ void ParquetWrier::write(long * vecNativeId, Int colNums,
             chunks.emplace_back(buildVARCHARChunk(typeId, vec, isSplitWrite, startPos, endPos));
             break;
         case OMNI_DECIMAL64:
+            precision = precisions[decimalIndex];
+            scale = scales[decimalIndex];
             chunks.emplace_back(buildDECIMAL64Chunk(typeId, vec, precision, scale, isSplitWrite, startPos, endPos));
+            decimalIndex++;
             break;
         case OMNI_DECIMAL128:
+            precision = precisions[decimalIndex];
+            scale = scales[decimalIndex];
             chunks.emplace_back(buildDECIMAL128Chunk(typeId, vec, precision, scale, isSplitWrite, startPos, endPos));
+            decimalIndex++;
             break;
         default:
             throw std::runtime_error(
-                "Native columnar write not support for this type: " + typedId)
+                "Native columnar write not support for this type: " + typeId);
 
         }
     }
-     auto numRows = (chunks,empty() ? 0 : chunks[0]->length());
+     auto numRows = (chunks.empty() ? 0 : chunks[0]->length());
 
      auto table = arrow::Table::Make(schema_, std::move(chunks), numRows);
-     PARQUET_THROW_NOT_OK(arrow_writer->WriterTable(*table));
+     PARQUET_THROW_NOT_OK(arrow_writer->WriteTable(*table));
      PARQUET_THROW_NOT_OK(arrow_writer->Close());
 }
 
diff --git a/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/OmniFileFormatDataWriter.scala b/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/OmniFileFormatDataWriter.scala
index 78fdb1aab..22422021b 100644
--- a/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/OmniFileFormatDataWriter.scala
+++ b/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/OmniFileFormatDataWriter.scala
@@ -157,9 +157,6 @@ abstract class OmniBaseDynamicPartitionDataWriter(
     row => proj(row).getInt(0)
   }
 
-  /** Returns the data columns to be written given an input row */
-  protected val getOutputRow =
-    UnsafeProjection.create(description.dataColumns, description.allColumns)
 
   protected def getPartitionPath(partitionValues: Option[InternalRow],
                                  bucketId: Option[Int]): String = {
@@ -190,6 +187,10 @@ abstract class OmniBaseDynamicPartitionDataWriter(
     currentPath
   }
 
+  /** Returns the data columns to be written given an input row */
+  protected val getOutputRow =
+    UnsafeProjection.create(description.dataColumns, description.allColumns)
+
   /**
    * Opens a new OutputWriter given a partition key and/or a bucket id.
    * If bucket id is specified, we will append it to the end of the file name, but before the
@@ -282,14 +283,14 @@ abstract class OmniBaseDynamicPartitionDataWriter(
     //  a abstract interface named OmniOutPutWriter
     currentWriter match {
               case _: OmniParquetOutputWriter =>
-                      currentWriter.asInstanceOf[OmniParquetOutputWriter]
-                        .initialize(description.allColumns, description.dataColumns)
+                      assert(currentWriter.asInstanceOf[OmniParquetOutputWriter])
+                      currentWriter.asInstanceOf[OmniParquetOutputWriter].splitWriter(record,startPos,endPos)
               case _: OmniOrcOutputWriter =>
+                      assert(currentWriter.asInstanceOf[OmniOrcOutputWriter]).splitWriter(record,startPos,endPos)
                       currentWriter.asInstanceOf[OmniOrcOutputWriter]
-                        .initialize(description.allColumns, description.dataColumns)
               case _ =>
                 throw new UnsupportedOperationException
-                (s"Unsupported ${currentWriter.getClass} Output writer!")
+                (s"writeRecord Unsupported ${currentWriter.getClass} Output writer!")
     }
 
     statsTrackers.foreach(_.newRow(currentWriter.path, record))
diff --git a/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ OmniParquetOutputWriter.scala b/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ OmniParquetOutputWriter.scala
index 6d8653d52..813f6088a 100644
--- a/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ OmniParquetOutputWriter.scala	
+++ b/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ OmniParquetOutputWriter.scala	
@@ -38,7 +38,7 @@ class OmniParquetOutputWriter(path: String, dataSchema: StructType,
   val writer = new ParquetColumnarBatchWriter()
   var omniTypes: Array[Int] = new Array[Int](0)
   var dataColumnsIds: Array[Boolean] = new Array[Boolean](0)
-  var allOmniTypes: Array[int] new Array[Int](0)
+  var allOmniTypes: Array[int] = new Array[Int](0)
 
   def initialize(allColumns: Seq[Attribute], dataColumns: Seq[Attribute]): Unit = {
     val filePath = new Path(path)
-- 
Gitee


From 3104490fddf2ea821340a905dfa341bc2697b101 Mon Sep 17 00:00:00 2001
From: caojiazhi <352139040@qq.com>
Date: Thu, 21 Nov 2024 22:34:58 +0800
Subject: [PATCH 4/6] Parquet table write fix bug

---
 .../src/jni/ParquetColumnarBatchJniWriter.cpp | 14 +++----
 .../src/jni/ParquetColumnarBatchJniWriter.h   |  2 +-
 .../cpp/src/parquet/ParquetWriter.cpp         | 22 +++++------
 .../{ParquetWrier.h => ParquetWriter.h}       |  4 +-
 .../jni/ParquetColumnarBatchJniWriter.java    | 37 +++++++++++++++++++
 ...r.java => ParquetColumnarBatchWriter.java} |  1 -
 .../OmniFileFormatDataWriter.scala            |  8 ++--
 .../parquet/ OmniParquetOutputWriter.scala    |  6 +--
 8 files changed, 65 insertions(+), 29 deletions(-)
 rename omnioperator/omniop-native-reader/cpp/src/parquet/{ParquetWrier.h => ParquetWriter.h} (91%)
 create mode 100644 omnioperator/omniop-native-reader/java/src/main/java/com/huawei/boostkit/write/jni/ParquetColumnarBatchJniWriter.java
 rename omnioperator/omniop-spark-extension/java/src/main/java/com/huawei/boostkit/spark/jni/{ ParquetColumnarBatchWriter.java => ParquetColumnarBatchWriter.java} (99%)

diff --git a/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.cpp b/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.cpp
index 9dceae8d5..8faa1a3b5 100644
--- a/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.cpp
+++ b/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.cpp
@@ -79,7 +79,7 @@ JNIEXPORT jlong JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJ
 }
 
 JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_initializeSchema
-        (JNIEnv* env, jobject JObj, long writer, jobjectArray fileNames, jintArray fieldTypes, jbooleanArray nullables, jobjectArray decimalParam){
+        (JNIEnv* env, jobject JObj, long writer, jobjectArray fieldNames, jintArray fieldTypes, jbooleanArray nullables, jobjectArray decimalParam){
         if (pWriter == nullptr) {
             pWriter = new ParquetWriter();
         }
@@ -94,17 +94,17 @@ JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJn
         FieldVector fieldVector;
         for (int i =0 ;i < schemeLength; i++) {
             jint  parquetType = fieldTypesPtr[i];
-            jboolean nullable = nullablesPtr[i]
+            jboolean nullable = nullablesPtr[i];
             jstring fieldName = (jstring)env->GetObjectArrayElement(fieldNames,i);
             const char *cFieldName = env->GetStringUTFChars(fieldName, nullptr);
             std::shared_ptr<DataType> writeParquetType;
-            if (static_cast<Type::type>(parquetType) == Type::type DECIMAL) {
+            if (static_cast<Type::type>(parquetType) == Type::type::DECIMAL) {
                 auto decimalParamArray = (jintArray)env->GetObjectArrayElement(decimalParam, i);
                 auto decimalParamArrayPtr = env->GetIntArrayElements(decimalParamArray,JNI_FALSE);
                 auto precision = decimalParamArrayPtr[DECIMAL_PRECISION_INDEX];
-                pWriter->precision=decimalParamArrayPtr[DECIMAL_PRECISION_INDEX];
+                pWriter->precisions.push_back(precision);
                 auto scale = decimalParamArrayPtr[DECIMAL_SCALE_INDEX];
-                pWriter->scale = decimalParamArrayPtr[DECIMAL_SCALE_INDEX];
+                pWriter->scales.push_back(scale);
                 writeParquetType = decimal128(precision, scale);
             } else {
                 switch(static_cast<Type::type>(parquetType)) {
@@ -158,12 +158,12 @@ Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_write(
    auto dataColumnsIdsPtr = env->GetBooleanArrayElements(dataColumnsIds, JNI_FALSE);
    pWriter->write(vecNativeIdPtr, colNums, omniTypesPtr, dataColumnsIdsPtr);
 
-   JNI_FUNC_END(runtimeExceptionClass)
+   JNI_FUNC_END_VOID(runtimeExceptionClass)
 
     }
 
 JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_splitWrite(
-        JNIEnv* env,jobject jObj, jlong writer, jlongArray vecNativeId, jintArray omniTypes, jbooleanArray dataColumnsIds
+        JNIEnv* env,jobject jObj, jlong writer, jlongArray vecNativeId, jintArray omniTypes, jbooleanArray dataColumnsIds,
         jlong startPos, jlong endPos) {
     JNI_FUNC_START
     auto vecNativeIdPtr = env->GetLongArrayElements(vecNativeId, JNI_FALSE);
diff --git a/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.h b/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.h
index 0cac9a7be..0755f4916 100644
--- a/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.h
+++ b/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.h
@@ -48,7 +48,7 @@ JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJn
     jintArray omniTypes, jbooleanArray dataColumnsIds, jint numRows);
 
 JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_splitWrite(
-        JNIEnv* env,jobject jObj, jlong writer, jlongArray vecNativeId, jintArray omniTypes, jbooleanArray dataColumnsIds
+        JNIEnv* env,jobject jObj, jlong writer, jlongArray vecNativeId, jintArray omniTypes, jbooleanArray dataColumnsIds,
         jlong startPos, jlong endPos);
 
 #ifdef __cplusplus
diff --git a/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWriter.cpp b/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWriter.cpp
index b4ec12071..1f1b0606b 100644
--- a/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWriter.cpp
+++ b/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWriter.cpp
@@ -41,7 +41,7 @@
 #include <iostream>
 #include <codecvt>
 #include <locale>
-#include <type/decimal_operations.h>
+
 
 
 using namespace arrow;
@@ -587,7 +587,7 @@ std::shared_ptr<arrow::ChunkedArray> buildDecimal64Chunk(DataTypeId typeId, Base
     buffers.emplace_back(dataBuffer);
 
     auto decimal128Type = std::make_shared<arrow::Decimal128Type>(precision,scale);
-    auto arrayData = arrow::ArrayData::Make(Decimal128Type, vectorSize, buffers);
+    auto arrayData = arrow::ArrayData::Make(decimal128Type, vectorSize, buffers);
 
     std::vector<std::shared_ptr<Array>> arrayVector;
     auto decimal128Array = std::make_shared<Decimal128Array>(arrayData);
@@ -619,7 +619,7 @@ std::shared_ptr<ChunkedArray> buildDecimal128Chunk(DataTypeId typeId, BaseVector
 
     long index=0;
     for (long j =startPos; j< endPos; j++) {
-        auto decimalValue=vector->GetValue(j)
+        auto decimalValue=vector->GetValue(j);
         BasicDecimal128 basicDecimal128(vector->GetValue(j).HighBits(),vector->GetValue(j).LowBits());
         decimalArray.emplace_back(BasicDecimal128(basicDecimal128));
 
@@ -639,7 +639,7 @@ std::shared_ptr<ChunkedArray> buildDecimal128Chunk(DataTypeId typeId, BaseVector
     buffers.emplace_back(dataBuffer);
 
     auto decimal128Type = std::make_shared<arrow::Decimal128Type>(precision,scale);
-    auto arrayData = arrow::ArrayData::Make(Decimal128Type, vectorSize, buffers);
+    auto arrayData = arrow::ArrayData::Make(decimal128Type, vectorSize, buffers);
 
     std::vector<std::shared_ptr<Array>> arrayVector;
     auto decimal128Array = std::make_shared<Decimal128Array>(arrayData);
@@ -648,7 +648,7 @@ std::shared_ptr<ChunkedArray> buildDecimal128Chunk(DataTypeId typeId, BaseVector
     return ChunkedArray::Make(arrayVector, decimal128Type).ValueOrDie();
     }
 
-void ParquetWriter::write(long * vecNativeId, Int colNums,
+void ParquetWriter::write(long * vecNativeId, int colNums,
                          const int *omniTypes,
                          const unsigned char *dataColumnsIds,
                          bool isSplitWrite, long startPos, long endPos) {
@@ -677,27 +677,27 @@ void ParquetWriter::write(long * vecNativeId, Int colNums,
             chunks.emplace_back(buildInt64Chunk(typeId, vec, isSplitWrite, startPos, endPos));
             break;
         case OMNI_DATE32:
-            chunks.emplace_back(buildDATE32Chunk(typeId, vec, isSplitWrite, startPos, endPos));
+            chunks.emplace_back(buildDate32Chunk(typeId, vec, isSplitWrite, startPos, endPos));
             break;
         case OMNI_DATE64:
-            chunks.emplace_back(buildDATE64Chunk(typeId, vec, isSplitWrite, startPos, endPos));
+            chunks.emplace_back(buildDate64Chunk(typeId, vec, isSplitWrite, startPos, endPos));
             break;
         case OMNI_DOUBLE:
-            chunks.emplace_back(buildDOUBLEChunk(typeId, vec, isSplitWrite, startPos, endPos));
+            chunks.emplace_back(buildDoubleChunk(typeId, vec, isSplitWrite, startPos, endPos));
             break;
         case OMNI_VARCHAR:
-            chunks.emplace_back(buildVARCHARChunk(typeId, vec, isSplitWrite, startPos, endPos));
+            chunks.emplace_back(buildVarcharChunk(typeId, vec, isSplitWrite, startPos, endPos));
             break;
         case OMNI_DECIMAL64:
             precision = precisions[decimalIndex];
             scale = scales[decimalIndex];
-            chunks.emplace_back(buildDECIMAL64Chunk(typeId, vec, precision, scale, isSplitWrite, startPos, endPos));
+            chunks.emplace_back(buildDecimal64Chunk(typeId, vec, precision, scale, isSplitWrite, startPos, endPos));
             decimalIndex++;
             break;
         case OMNI_DECIMAL128:
             precision = precisions[decimalIndex];
             scale = scales[decimalIndex];
-            chunks.emplace_back(buildDECIMAL128Chunk(typeId, vec, precision, scale, isSplitWrite, startPos, endPos));
+            chunks.emplace_back(buildDecimal128Chunk(typeId, vec, precision, scale, isSplitWrite, startPos, endPos));
             decimalIndex++;
             break;
         default:
diff --git a/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWrier.h b/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWriter.h
similarity index 91%
rename from omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWrier.h
rename to omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWriter.h
index 3a5215e6e..8e5259b3a 100644
--- a/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWrier.h
+++ b/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWriter.h
@@ -34,8 +34,8 @@ class ParquetWriter {
 public:
 	ParquetWriter() {}
 	
-	arrow::Status InitRecordWriter(UriInfo &uri, std::string &ugi)
-	std::share_ptr<arrow::Field> BuildField(const std::string& name, int typeId, bool nullable)
+	arrow::Status InitRecordWriter(UriInfo &uri, std::string &ugi);
+	std::shared_ptr<arrow::Field> BuildField(const std::string& name, int typeId, bool nullable);
 	void write(long *vecNativeId, int colNums, const int *omniTypes, const unsigned char *dataColumnsIds,
 					bool isSplitWrite = false, long starPos = 0,
 					long endPos = 0);
diff --git a/omnioperator/omniop-native-reader/java/src/main/java/com/huawei/boostkit/write/jni/ParquetColumnarBatchJniWriter.java b/omnioperator/omniop-native-reader/java/src/main/java/com/huawei/boostkit/write/jni/ParquetColumnarBatchJniWriter.java
new file mode 100644
index 000000000..704cac09d
--- /dev/null
+++ b/omnioperator/omniop-native-reader/java/src/main/java/com/huawei/boostkit/write/jni/ParquetColumnarBatchJniWriter.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved.
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.huawei.boostkit.write.jni;
+
+import com.huawei.boostkit.scan.jni.NativeReaderLoader;
+
+import org.json.JSONObject;
+
+public class ParquetColumnarBatchJniWriter {
+    public ParquetColumnarBatchJniWriter() {
+        NativeReaderLoader.getInstance();
+    }
+
+    public native long initializeWriter(JSONObject var1);
+
+    public native void initializeSchema(long writer, String[] fieldNames, int[] fieldTypes, boolean[] nullables, int[][]decimalParam);
+
+    public native void write(long writer, long[] vecNativeId, int[] omniTypes, boolean[] dataColumnsIds, int rowNums);
+
+    public native void splitWrite(long writer, long[] vecNativeId, int[] omniTypes, boolean[] dataColumnsIds, long starPos, long endPos);
+}
\ No newline at end of file
diff --git a/omnioperator/omniop-spark-extension/java/src/main/java/com/huawei/boostkit/spark/jni/ ParquetColumnarBatchWriter.java b/omnioperator/omniop-spark-extension/java/src/main/java/com/huawei/boostkit/spark/jni/ParquetColumnarBatchWriter.java
similarity index 99%
rename from omnioperator/omniop-spark-extension/java/src/main/java/com/huawei/boostkit/spark/jni/ ParquetColumnarBatchWriter.java
rename to omnioperator/omniop-spark-extension/java/src/main/java/com/huawei/boostkit/spark/jni/ParquetColumnarBatchWriter.java
index c253d6963..fd5ba784a 100644
--- a/omnioperator/omniop-spark-extension/java/src/main/java/com/huawei/boostkit/spark/jni/ ParquetColumnarBatchWriter.java	
+++ b/omnioperator/omniop-spark-extension/java/src/main/java/com/huawei/boostkit/spark/jni/ParquetColumnarBatchWriter.java
@@ -45,7 +45,6 @@ import org.apache.spark.sql.types.StructType;
 import org.apache.spark.sql.types.VarcharType;
 import org.apache.spark.sql.vectorized.ColumnarBatch;
 import org.json.JSONObject;
-import org.apache.spark.sql.catalyst.util.RebaseDateTime;
 
 import java.io.IOException;
 import java.net.URI;
diff --git a/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/OmniFileFormatDataWriter.scala b/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/OmniFileFormatDataWriter.scala
index 22422021b..5adbd6c5d 100644
--- a/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/OmniFileFormatDataWriter.scala
+++ b/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/OmniFileFormatDataWriter.scala
@@ -283,11 +283,11 @@ abstract class OmniBaseDynamicPartitionDataWriter(
     //  a abstract interface named OmniOutPutWriter
     currentWriter match {
               case _: OmniParquetOutputWriter =>
-                      assert(currentWriter.asInstanceOf[OmniParquetOutputWriter])
-                      currentWriter.asInstanceOf[OmniParquetOutputWriter].splitWriter(record,startPos,endPos)
+                      assert(currentWriter.isInstanceOf[OmniParquetOutputWriter])
+                      currentWriter.asInstanceOf[OmniParquetOutputWriter].spiltWrite(record,startPos,endPos)
               case _: OmniOrcOutputWriter =>
-                      assert(currentWriter.asInstanceOf[OmniOrcOutputWriter]).splitWriter(record,startPos,endPos)
-                      currentWriter.asInstanceOf[OmniOrcOutputWriter]
+                      assert(currentWriter.isInstanceOf[OmniOrcOutputWriter])
+                      currentWriter.asInstanceOf[OmniOrcOutputWriter].spiltWrite(record,startPos,endPos)
               case _ =>
                 throw new UnsupportedOperationException
                 (s"writeRecord Unsupported ${currentWriter.getClass} Output writer!")
diff --git a/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ OmniParquetOutputWriter.scala b/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ OmniParquetOutputWriter.scala
index 813f6088a..92f99717f 100644
--- a/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ OmniParquetOutputWriter.scala	
+++ b/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ OmniParquetOutputWriter.scala	
@@ -25,7 +25,7 @@ import org.apache.hadoop.security.UserGroupInformation
 import org.apache.parquet.hadoop.ParquetOutputFormat
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.execution.datasources.{OmniFakeRow, OutputWriter}
+import org.apache.spark.sql.execution.datasources.{OmniInternalRow, OutputWriter}
 import org.apache.spark.sql.types.StructType
 
 import java.net.URI
@@ -38,7 +38,7 @@ class OmniParquetOutputWriter(path: String, dataSchema: StructType,
   val writer = new ParquetColumnarBatchWriter()
   var omniTypes: Array[Int] = new Array[Int](0)
   var dataColumnsIds: Array[Boolean] = new Array[Boolean](0)
-  var allOmniTypes: Array[int] = new Array[Int](0)
+  var allOmniTypes: Array[Int] = new Array[Int](0)
 
   def initialize(allColumns: Seq[Attribute], dataColumns: Seq[Attribute]): Unit = {
     val filePath = new Path(path)
@@ -58,7 +58,7 @@ class OmniParquetOutputWriter(path: String, dataSchema: StructType,
 
   override def write(row: InternalRow): Unit = {
     assert(row.isInstanceOf[OmniInternalRow])
-    writer.write(omniTypes, dataColumnsIds, row.asInstanceOf[OmniFakeRow].batch)
+    writer.write(omniTypes, dataColumnsIds, row.asInstanceOf[OmniInternalRow].batch)
   }
 
   def spiltWrite(row: InternalRow, startPos: Long, endPos: Long): Unit = {
-- 
Gitee


From 2e2f1ad419bef805071e7de27dbc14477bfd612b Mon Sep 17 00:00:00 2001
From: caojiazhi <352139040@qq.com>
Date: Fri, 22 Nov 2024 15:36:49 +0800
Subject: [PATCH 5/6] Parquet table write codecheck

---
 .editorconfig                                 |    0
 .../src/jni/ParquetColumnarBatchJniWriter.cpp |  250 ++--
 .../src/jni/ParquetColumnarBatchJniWriter.h   |   27 +-
 .../cpp/src/parquet/ParquetWriter.cpp         | 1039 ++++++++---------
 .../cpp/src/parquet/ParquetWriter.h           |   37 +-
 .../spark/jni/ParquetColumnarBatchWriter.java |   18 +-
 .../OmniFileFormatDataWriter.scala            |   26 +-
 .../parquet/ OmniParquetOutputWriter.scala    |   14 +-
 .../parquet/OmniParquetFileFormat.scala       |   22 +-
 9 files changed, 714 insertions(+), 719 deletions(-)
 create mode 100644 .editorconfig

diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 000000000..e69de29bb
diff --git a/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.cpp b/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.cpp
index 8faa1a3b5..87f4fd494 100644
--- a/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.cpp
+++ b/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.cpp
@@ -1,5 +1,5 @@
 /**
-* Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved.
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -17,7 +17,6 @@
  * limitations under the License.
  */
 
-
 #include "ParquetColumnarBatchJniWriter.h"
 #include "jni_common.h"
 #include "parquet/ParquetWriter.h"
@@ -32,139 +31,150 @@ using namespace arrow;
 static constexpr int32_t DECIMAL_PRECISION_INDEX = 0;
 static constexpr int32_t DECIMAL_SCALE_INDEX = 1;
 
-//定义全局的ParquetWriter指针
+// 定义全局的ParquetWriter指针
 ParquetWriter *pWriter = nullptr;
 
-JNIEXPORT jlong JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_initializeWriter(JNIEnv* env,
-                                                                                                          jobject jObj, jobject jsonObj)
+JNIEXPORT jlong JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_initializeWriter(
+    JNIEnv *env, jobject jObj, jobject jsonObj)
 {
-  JNI_FUNC_START
-  //Get uriStr
-  jstring uri = (jstring)env->CallObjectMethod(jsonObj, jsonMethodString, env->NewStringUTF("uri"));
-  const char *uriStr = env->GetStringUTFChars(uri, JNI_FALSE);
-  std::string uriString(uriStr);
-  env->ReleaseStringUTFChars(uri,uriStr);
-
-  jstring ugiTemp = (jstring)env->CallObjectMethod(jsonObj, jsonMethodString, env->NewStringUTF("ugi"));
-  const char *ugi = env->GetStringUTFChars(ugiTemp, JNI_FALSE);
-  std::string ugiString(ugi);
-  env->ReleaseStringUTFChars(ugiTemp, ugi);
-
-  jstring schemeTemp = (jstring)env->CallObjectMethod(jsonObj, jsonMethodString, env->NewStringUTF("scheme"));
-  const char *scheme = env->GetStringUTFChars(schemeTemp, JNI_FALSE);
-  std::string schemeString(scheme);
-  env->ReleaseStringUTFChars(schemeTemp, scheme);
-
-  jstring hostTemp = (jstring)env->CallObjectMethod(jsonObj, jsonMethodString, env->NewStringUTF("host"));
-  const char *host = env->GetStringUTFChars(hostTemp, JNI_FALSE);
-  std::string hostString(host);
-  env->ReleaseStringUTFChars(hostTemp, host);
-
-  jstring pathTemp = (jstring)env->CallObjectMethod(jsonObj, jsonMethodString, env->NewStringUTF("path"));
-  const char *path = env->GetStringUTFChars(pathTemp, JNI_FALSE);
-  std::string pathString(path);
-  env->ReleaseStringUTFChars(pathTemp, path);
-
-  jint port = (jint)env->CallIntMethod(jsonObj, jsonMethodInt, env->NewStringUTF("port"));
-
-  UriInfo uriInfo(uriString, schemeString, pathString, hostString, std::to_string(port));
-
-  auto state = pWriter->InitRecordWriter(uriInfo, ugiString);
-  if (state != arrow::Status::OK()) {
-    env->ThrowNew(runtimeExceptionClass, state.ToString().c_str());
-    return 0;
-  }
-  return (jlong)(pWriter);
-  JNI_FUNC_END(runtimeExceptionClass)
+    JNI_FUNC_START
+    // Get uriStr
+    jstring uri = (jstring)env->CallObjectMethod(jsonObj, jsonMethodString, env->NewStringUTF("uri"));
+    const char *uriStr = env->GetStringUTFChars(uri, JNI_FALSE);
+    std::string uriString(uriStr);
+    env->ReleaseStringUTFChars(uri, uriStr);
+
+    jstring ugiTemp = (jstring)env->CallObjectMethod(jsonObj, jsonMethodString, env->NewStringUTF("ugi"));
+    const char *ugi = env->GetStringUTFChars(ugiTemp, JNI_FALSE);
+    std::string ugiString(ugi);
+    env->ReleaseStringUTFChars(ugiTemp, ugi);
+
+    jstring schemeTemp = (jstring)env->CallObjectMethod(jsonObj, jsonMethodString, env->NewStringUTF("scheme"));
+    const char *scheme = env->GetStringUTFChars(schemeTemp, JNI_FALSE);
+    std::string schemeString(scheme);
+    env->ReleaseStringUTFChars(schemeTemp, scheme);
+
+    jstring hostTemp = (jstring)env->CallObjectMethod(jsonObj, jsonMethodString, env->NewStringUTF("host"));
+    const char *host = env->GetStringUTFChars(hostTemp, JNI_FALSE);
+    std::string hostString(host);
+    env->ReleaseStringUTFChars(hostTemp, host);
+
+    jstring pathTemp = (jstring)env->CallObjectMethod(jsonObj, jsonMethodString, env->NewStringUTF("path"));
+    const char *path = env->GetStringUTFChars(pathTemp, JNI_FALSE);
+    std::string pathString(path);
+    env->ReleaseStringUTFChars(pathTemp, path);
+
+    jint port = (jint)env->CallIntMethod(jsonObj, jsonMethodInt, env->NewStringUTF("port"));
+
+    UriInfo uriInfo(uriString, schemeString, pathString, hostString, std::to_string(port));
+
+    auto state = pWriter->InitRecordWriter(uriInfo, ugiString);
+    if (state != arrow::Status::OK())
+    {
+        env->ThrowNew(runtimeExceptionClass, state.ToString().c_str());
+        return 0;
+    }
+    return (jlong)(pWriter);
+    JNI_FUNC_END(runtimeExceptionClass)
 }
 
-JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_initializeSchema
-        (JNIEnv* env, jobject JObj, long writer, jobjectArray fieldNames, jintArray fieldTypes, jbooleanArray nullables, jobjectArray decimalParam){
-        if (pWriter == nullptr) {
-            pWriter = new ParquetWriter();
-        }
-        JNI_FUNC_START
-        auto fieldTypesPtr = env->GetIntArrayElements(fieldTypes, JNI_FALSE);
-        auto nullablesPtr = env->GetBooleanArrayElements(nullables, JNI_FALSE);
-        if (fieldTypesPtr == NULL) {
-            throw std::runtime_error("Parquet type ids should not be null");
+JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_initializeSchema(
+    JNIEnv *env, jobject JObj, long writer, jobjectArray fieldNames, jintArray fieldTypes, 
+    jbooleanArray nullables, jobjectArray decimalParam)
+{
+    if (pWriter == nullptr)
+    {
+        pWriter = new ParquetWriter();
+    }
+    JNI_FUNC_START
+    auto fieldTypesPtr = env->GetIntArrayElements(fieldTypes, JNI_FALSE);
+    auto nullablesPtr = env->GetBooleanArrayElements(nullables, JNI_FALSE);
+    if (fieldTypesPtr == NULL)
+    {
+        throw std::runtime_error("Parquet type ids should not be null");
+    }
+    auto schemeLength = (int32_t)env->GetArrayLength(fieldTypes);
+
+    FieldVector fieldVector;
+    for (int i = 0; i < schemeLength; i++)
+    {
+        jint parquetType = fieldTypesPtr[i];
+        jboolean nullable = nullablesPtr[i];
+        jstring fieldName = (jstring)env->GetObjectArrayElement(fieldNames, i);
+        const char *cFieldName = env->GetStringUTFChars(fieldName, nullptr);
+        std::shared_ptr<DataType> writeParquetType;
+        if (static_cast<Type::type>(parquetType) == Type::type::DECIMAL)
+        {
+            auto decimalParamArray = (jintArray)env->GetObjectArrayElement(decimalParam, i);
+            auto decimalParamArrayPtr = env->GetIntArrayElements(decimalParamArray, JNI_FALSE);
+            auto precision = decimalParamArrayPtr[DECIMAL_PRECISION_INDEX];
+            pWriter->precisions.push_back(precision);
+            auto scale = decimalParamArrayPtr[DECIMAL_SCALE_INDEX];
+            pWriter->scales.push_back(scale);
+            writeParquetType = decimal128(precision, scale);
         }
-        auto schemeLength = (int32_t)env->GetArrayLength(fieldTypes);
-
-        FieldVector fieldVector;
-        for (int i =0 ;i < schemeLength; i++) {
-            jint  parquetType = fieldTypesPtr[i];
-            jboolean nullable = nullablesPtr[i];
-            jstring fieldName = (jstring)env->GetObjectArrayElement(fieldNames,i);
-            const char *cFieldName = env->GetStringUTFChars(fieldName, nullptr);
-            std::shared_ptr<DataType> writeParquetType;
-            if (static_cast<Type::type>(parquetType) == Type::type::DECIMAL) {
-                auto decimalParamArray = (jintArray)env->GetObjectArrayElement(decimalParam, i);
-                auto decimalParamArrayPtr = env->GetIntArrayElements(decimalParamArray,JNI_FALSE);
-                auto precision = decimalParamArrayPtr[DECIMAL_PRECISION_INDEX];
-                pWriter->precisions.push_back(precision);
-                auto scale = decimalParamArrayPtr[DECIMAL_SCALE_INDEX];
-                pWriter->scales.push_back(scale);
-                writeParquetType = decimal128(precision, scale);
-            } else {
-                switch(static_cast<Type::type>(parquetType)) {
-                    case Type::type::BOOL:
-                        writeParquetType = arrow::boolean();
-                        break;
-                    case Type::type::INT16:
-                        writeParquetType = arrow::int16();
-                        break;
-                    case Type::type::INT32:
-                        writeParquetType = arrow::int32();
-                        break;
-                    case Type::type::INT64:
-                        writeParquetType = arrow::int64();
-                        break;
-                    case Type::type::DATE32:
-                        writeParquetType = arrow::date32();
-                        break;
-                    case Type::type::DATE64:
-                        writeParquetType = arrow::date64();
-                        break;
-                    case Type::type::DOUBLE:
-                        writeParquetType = arrow::float64();
-                        break;
-                    case Type::type::STRING:
-                        writeParquetType = arrow::utf8();
-                        break;
-                    default:
-                        throw std::invalid_argument("Unsupported parquet type");
-                }
+        else
+        {
+            switch (static_cast<Type::type>(parquetType))
+            {
+            case Type::type::BOOL:
+                writeParquetType = arrow::boolean();
+                break;
+            case Type::type::INT16:
+                writeParquetType = arrow::int16();
+                break;
+            case Type::type::INT32:
+                writeParquetType = arrow::int32();
+                break;
+            case Type::type::INT64:
+                writeParquetType = arrow::int64();
+                break;
+            case Type::type::DATE32:
+                writeParquetType = arrow::date32();
+                break;
+            case Type::type::DATE64:
+                writeParquetType = arrow::date64();
+                break;
+            case Type::type::DOUBLE:
+                writeParquetType = arrow::float64();
+                break;
+            case Type::type::STRING:
+                writeParquetType = arrow::utf8();
+                break;
+            default:
+                throw std::invalid_argument("Unsupported parquet type");
             }
-            auto t = field(cFieldName, writeParquetType, nullable);
-            fieldVector.emplace_back(t);
-            env->ReleaseStringUTFChars(fieldName,cFieldName);
         }
-        auto t = std::make_unique<Schema>(fieldVector);
-        pWriter->schema_ = std::make_shared<Schema>(fieldVector);
+        auto t = field(cFieldName, writeParquetType, nullable);
+        fieldVector.emplace_back(t);
+        env->ReleaseStringUTFChars(fieldName, cFieldName);
+    }
+    auto t = std::make_unique<Schema>(fieldVector);
+    pWriter->schema_ = std::make_shared<Schema>(fieldVector);
 
-        JNI_FUNC_END_VOID(runtimeExceptionClass)
+    JNI_FUNC_END_VOID(runtimeExceptionClass)
 }
 
 JNIEXPORT void JNICALL
 Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_write(
-    JNIEnv* env,jobject jObj, jlong writer, jlongArray vecNativeId,
-    jintArray omniTypes, jbooleanArray dataColumnsIds, jint numRows) {
-   JNI_FUNC_START
-   ParquetWriter *pWriter = (ParquetWriter *)writer;
-   auto vecNativeIdPtr = env->GetLongArrayElements(vecNativeId, JNI_FALSE);
-   auto colNums = env->GetArrayLength(vecNativeId);
-   auto omniTypesPtr = env->GetIntArrayElements(omniTypes, JNI_FALSE);
-   auto dataColumnsIdsPtr = env->GetBooleanArrayElements(dataColumnsIds, JNI_FALSE);
-   pWriter->write(vecNativeIdPtr, colNums, omniTypesPtr, dataColumnsIdsPtr);
-
-   JNI_FUNC_END_VOID(runtimeExceptionClass)
+    JNIEnv *env, jobject jObj, jlong writer, jlongArray vecNativeId,
+    jintArray omniTypes, jbooleanArray dataColumnsIds, jint numRows)
+{
+    JNI_FUNC_START
+    ParquetWriter *pWriter = (ParquetWriter *)writer;
+    auto vecNativeIdPtr = env->GetLongArrayElements(vecNativeId, JNI_FALSE);
+    auto colNums = env->GetArrayLength(vecNativeId);
+    auto omniTypesPtr = env->GetIntArrayElements(omniTypes, JNI_FALSE);
+    auto dataColumnsIdsPtr = env->GetBooleanArrayElements(dataColumnsIds, JNI_FALSE);
+    pWriter->write(vecNativeIdPtr, colNums, omniTypesPtr, dataColumnsIdsPtr);
 
-    }
+    JNI_FUNC_END_VOID(runtimeExceptionClass)
+}
 
 JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_splitWrite(
-        JNIEnv* env,jobject jObj, jlong writer, jlongArray vecNativeId, jintArray omniTypes, jbooleanArray dataColumnsIds,
-        jlong startPos, jlong endPos) {
+    JNIEnv *env, jobject jObj, jlong writer, jlongArray vecNativeId, jintArray omniTypes, jbooleanArray dataColumnsIds,
+    jlong startPos, jlong endPos)
+{
     JNI_FUNC_START
     auto vecNativeIdPtr = env->GetLongArrayElements(vecNativeId, JNI_FALSE);
     auto colNums = env->GetArrayLength(vecNativeId);
@@ -178,6 +188,4 @@ JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJn
     env->ReleaseIntArrayElements(omniTypes, omniTypesPtr, 0);
     env->ReleaseBooleanArrayElements(dataColumnsIds, dataColumnsIdsPtr, 0);
     JNI_FUNC_END_VOID(runtimeExceptionClass)
-
 }
-
diff --git a/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.h b/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.h
index 0755f4916..a281f0371 100644
--- a/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.h
+++ b/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.h
@@ -1,5 +1,5 @@
 /**
-* Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved.
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -33,23 +33,24 @@
 #include "common/debug.h"
 
 #ifdef __cplusplus
-extern "C" {
+extern "C"
+{
 #endif
 
+    JNIEXPORT jlong JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_initializeWriter
+        (JNIEnv *env, jobject jObj, jobject job);
 
-JNIEXPORT jlong JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_initializeWriter
-    (JNIEnv* env, jobject jObj, jobject job);
+    JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_initializeSchema
+        (JNIEnv *env, jobject jObj, long writer, jobjectArray filedNames, jintArray fieldTypes,
+         jbooleanArray nullables, jobjectArray decimalParam);
 
-JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_initializeSchema
-        (JNIEnv* env, jobject jObj, long writer, jobjectArray filedNames, jintArray fieldTypes, jbooleanArray nullables, jobjectArray decimalParam);
+    JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_write(
+        JNIEnv *env, jobject jObj, jlong writer, jlongArray vecNativeId,
+        jintArray omniTypes, jbooleanArray dataColumnsIds, jint numRows);
 
-JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_write(
-    JNIEnv* env,jobject jObj, jlong writer, jlongArray vecNativeId,
-    jintArray omniTypes, jbooleanArray dataColumnsIds, jint numRows);
-
-JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_splitWrite(
-        JNIEnv* env,jobject jObj, jlong writer, jlongArray vecNativeId, jintArray omniTypes, jbooleanArray dataColumnsIds,
-        jlong startPos, jlong endPos);
+    JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_splitWrite(
+        JNIEnv *env, jobject jObj, jlong writer, jlongArray vecNativeId, jintArray omniTypes,
+        jbooleanArray dataColumnsIds, jlong startPos, jlong endPos);
 
 #ifdef __cplusplus
 }
diff --git a/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWriter.cpp b/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWriter.cpp
index 1f1b0606b..fe559c62b 100644
--- a/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWriter.cpp
+++ b/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWriter.cpp
@@ -1,5 +1,5 @@
 /**
-* Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved.
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -42,8 +42,6 @@
 #include <codecvt>
 #include <locale>
 
-
-
 using namespace arrow;
 using namespace arrow::internal;
 using namespace parquet::arrow;
@@ -52,665 +50,652 @@ using namespace omniruntime::reader;
 
 static std::mutex mutex_;
 
-namespace omniruntime::writer {
+namespace omniruntime::writer
+{
 
-//    std::string GetReadAddr(const std::string address) {
-//        std::string prefix =
-//            "hdfs://OmniOperator:9000/user/hive/warehouse/"
-//            "tpcds_bin_partitioned_varchar_orc_2.db/test_parquet_int";
-//        auto pos = address.find_last_of('/');
-//        std::string suffix = address.substr(pos);
-//        return prefix + suffix;
-//    }
-
-    arrow::Status ParquetWriter::InitRecordWriter(UriInfo &uri, std::string &ugi) {
+    arrow::Status ParquetWriter::InitRecordWriter(UriInfo &uri, std::string &ugi)
+    {
 
         // Configure writer settings
         parquet::WriterProperties::Builder writer_properties;
 
-        //Configure Arrow-specific reader settings
+        // Configure Arrow-specific reader settings
         parquet::ArrowWriterProperties::Builder arrow_writer_properties;
 
         std::shared_ptr<io::OutputStream> outputStream;
 
-        //Get the file from filesystem
+        // Get the file from filesystem
         arrow::Status result;
         mutex_.lock();
         Filesystem *fs = GetFileSystemPtr(uri, ugi, result);
         mutex_.unlock();
-        if (fs == nullptr || fs->filesys_ptr == nullptr) {
-          return arrow::Status::IOError(result);
+        if (fs == nullptr || fs->filesys_ptr == nullptr)
+        {
+            return arrow::Status::IOError(result);
         }
 
-    std::string path = uri.ToString();
-    ARROW_ASSIGN_OR_RAISE(outputStream, fs->filesys_ptr->OpenOutputStream(path));
+        std::string path = uri.ToString();
+        ARROW_ASSIGN_OR_RAISE(outputStream, fs->filesys_ptr->OpenOutputStream(path));
 
-    writer_properties.disable_dictionary();
+        writer_properties.disable_dictionary();
 
-    // Temporarily use the default value of WriterProperties and
-    auto fileWriter = FileWriter::Open(
-        *schema_,arrow::default_memory_pool(), outputStream,
-        writer_properties.build(),parquet::default_arrow_writer_properties());
+        // Temporarily use the default value of WriterProperties and
+        auto fileWriter = FileWriter::Open(
+            *schema_, arrow::default_memory_pool(), outputStream,
+            writer_properties.build(), parquet::default_arrow_writer_properties());
 
-    ARROW_ASSIGN_OR_RAISE(arrow_writer, fileWriter);
+        ARROW_ASSIGN_OR_RAISE(arrow_writer, fileWriter);
 
-    // ARROW_RETURN_NOT_OK(fileWriter.ValueOrDie()->Close());
-    auto pool = arrow::default_memory_pool();
+        // ARROW_RETURN_NOT_OK(fileWriter.ValueOrDie()->Close());
+        auto pool = arrow::default_memory_pool();
 
-    return arrow::Status::OK();
+        return arrow::Status::OK();
     }
 
-//std::shared_ptr<arrow::Field>
-//ParquetWriter::BuildField(const std::string &name, int typeId, bool nullable) {
-//    switch (typeId) {
-//        case Type::BOOL:
-//           return std::make_shared<Field>(name, std::make_shared<arrow::BooleanType>(),
-//                                       nullable);
-//        case Type::INT16:
-//            return std::make_shared<Field>(name, std::make_shared<Int16Type>(),
-//                                           nullable);
-//        case Type::INT32:
-//            return std::make_shared<Field>(name, std::make_shared<Int32Type>(),
-//                                               nullable);
-//        case Type::INT64:
-//            return std::make_shared<Field>(name, std::make_shared<Int64Type>(),
-//                                               nullable);
-//        case Type::DATE32:
-//            return std::make_shared<Field>(name, std::make_shared<arrow::Date32Type>(),
-//                                               nullable);
-//        case Type::DATE64:
-//            return std::make_shared<Field>(name, std::make_shared<arrow::Date64Type>(),
-//                                               nullable);
-//        case Type::DOUBLE:
-//            return std::make_shared<Field>(name, std::make_shared<arrow::DoubleType>(),
-//                                               nullable);
-//        case Type::STRING:
-//            return std::make_shared<Field>(name, std::make_shared<arrow::StringType>(),
-//                                               nullable);
-//        case Type::DECIMAL64:
-//            return std::make_shared<Field>(name, std::make_shared<arrow::Decimal64Type>(38,2),
-//                                               nullable);
-//
-//        case Type::DECIMAL128:
-//            return std::make_shared<Field>(name, std::make_shared<arrow::Decimal128Type>(38,2),
-//                                               nullable);
-//            default:
-//              throw parquet::ParquetException("Un supported parquet type", typeId);
-//    }
-//}
-
-std::shared_ptr<::arrow::ChunkedArray> buildBooleanChunk(DataTypeId typeId, BaseVector *baseVector,
-                                                       bool isSplitWrite = false, long startPos = 0,
-                                                       long endPos = 0) {
-    using T = typename NativeType<OMNI_BOOLEAN>::type;
-    auto vector = (Vector<T> *) baseVector;
-
-    if (!isSplitWrite) {
-        startPos = 0;
-        endPos = vector->GetSize();
+    std::shared_ptr<::arrow::ChunkedArray> buildBooleanChunk(DataTypeId typeId, BaseVector *baseVector,
+                                                             bool isSplitWrite = false, long startPos = 0,
+                                                             long endPos = 0)
+    {
+        using T = typename NativeType<OMNI_BOOLEAN>::type;
+        auto vector = (Vector<T> *)baseVector;
+
+        if (!isSplitWrite)
+        {
+            startPos = 0;
+            endPos = vector->GetSize();
         }
 
-    long vectorSize = endPos - startPos;
-    bool values[vectorSize];
-    long index=0;
-    auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
-    arrow::internal::Bitmap bitmap(bitmapBuffer,0,vectorSize);
-    bitmap.SetBitsTo(true);
-
-    //todo why use vector-> Hashnull before setbitto will make other colums unnormal
-
-    for (long j =startPos; j< endPos; j++) {
-        values[index]=vector->GetValue(j);
-        if (vector->IsNull(j)) {
-            bitmap.SetBitTo(index,false);
+        long vectorSize = endPos - startPos;
+        bool values[vectorSize];
+        long index = 0;
+        auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
+        arrow::internal::Bitmap bitmap(bitmapBuffer, 0, vectorSize);
+        bitmap.SetBitsTo(true);
+
+        // todo why use vector-> Hashnull before setbitto will make other colums unnormal
+
+        for (long j = startPos; j < endPos; j++)
+        {
+            values[index] = vector->GetValue(j);
+            if (vector->IsNull(j))
+            {
+                bitmap.SetBitTo(index, false);
+            }
+            index++;
         }
-        index++;
-    }
 
-    TypedBufferBuilder<bool> builder;
-    builder.Resize(vectorSize);
+        TypedBufferBuilder<bool> builder;
+        builder.Resize(vectorSize);
 
-    builder.Append(reinterpret_cast<uint8_t*>(values), vectorSize);
-    auto maybe_buffer = builder.Finish();
-    std::shared_ptr<arrow::Buffer> databuffer = *maybe_buffer;
+        builder.Append(reinterpret_cast<uint8_t *>(values), vectorSize);
+        auto maybe_buffer = builder.Finish();
+        std::shared_ptr<arrow::Buffer> databuffer = *maybe_buffer;
 
-    std::vector<std::shared_ptr<Buffer>> buffers;
-    buffers.emplace_back(bitmapBuffer);
-    buffers.emplace_back(databuffer);
+        std::vector<std::shared_ptr<Buffer>> buffers;
+        buffers.emplace_back(bitmapBuffer);
+        buffers.emplace_back(databuffer);
 
-    auto booleanType = std::make_shared<arrow::BooleanType>();
-    auto arrayData = arrow::ArrayData::Make(booleanType, vectorSize, buffers);
+        auto booleanType = std::make_shared<arrow::BooleanType>();
+        auto arrayData = arrow::ArrayData::Make(booleanType, vectorSize, buffers);
 
-    std::vector<std::shared_ptr<arrow::Array>> arrayVector;
-    auto booleanArray = std::make_shared<arrow::BooleanArray>(arrayData);
-    arrayVector.emplace_back(booleanArray);
+        std::vector<std::shared_ptr<arrow::Array>> arrayVector;
+        auto booleanArray = std::make_shared<arrow::BooleanArray>(arrayData);
+        arrayVector.emplace_back(booleanArray);
 
-    return arrow::ChunkedArray::Make(arrayVector, booleanType).ValueOrDie();
+        return arrow::ChunkedArray::Make(arrayVector, booleanType).ValueOrDie();
     }
 
-std::shared_ptr<::arrow::ChunkedArray> buildInt16Chunk(DataTypeId typeId, BaseVector *baseVector,
-                                                       bool isSplitWrite = false, long startPos = 0,
-                                                       long endPos = 0) {
-    using T = typename NativeType<OMNI_SHORT>::type;
-    auto vector = (Vector<T> *) baseVector;
-
-    if (!isSplitWrite) {
-        startPos = 0;
-        endPos = vector->GetSize();
+    std::shared_ptr<::arrow::ChunkedArray> buildInt16Chunk(DataTypeId typeId, BaseVector *baseVector,
+                                                           bool isSplitWrite = false, long startPos = 0,
+                                                           long endPos = 0)
+    {
+        using T = typename NativeType<OMNI_SHORT>::type;
+        auto vector = (Vector<T> *)baseVector;
+
+        if (!isSplitWrite)
+        {
+            startPos = 0;
+            endPos = vector->GetSize();
         }
 
-    long vectorSize = endPos - startPos;
-    short values[vectorSize];
-    long index=0;
-
-    auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
-    arrow::internal::Bitmap bitmap(bitmapBuffer,0,vectorSize);
-    bitmap.SetBitsTo(true);
-
-
-
-    for (long j =startPos; j< endPos; j++) {
-        values[index]=vector->GetValue(j);
-        if (vector->IsNull(j)) {
-            bitmap.SetBitTo(index,false);
+        long vectorSize = endPos - startPos;
+        short values[vectorSize];
+        long index = 0;
+
+        auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
+        arrow::internal::Bitmap bitmap(bitmapBuffer, 0, vectorSize);
+        bitmap.SetBitsTo(true);
+
+        for (long j = startPos; j < endPos; j++)
+        {
+            values[index] = vector->GetValue(j);
+            if (vector->IsNull(j))
+            {
+                bitmap.SetBitTo(index, false);
+            }
+            index++;
         }
-        index++;
-    }
 
-    TypedBufferBuilder<int16_t> builder;
-    builder.Resize(vectorSize);
-    builder.Append(values, vectorSize);
-    auto maybe_buffer = builder.Finish();
-    std::shared_ptr<Buffer> dataBuffer = *maybe_buffer;
+        TypedBufferBuilder<int16_t> builder;
+        builder.Resize(vectorSize);
+        builder.Append(values, vectorSize);
+        auto maybe_buffer = builder.Finish();
+        std::shared_ptr<Buffer> dataBuffer = *maybe_buffer;
 
-    std::vector<std::shared_ptr<Buffer>> buffers;
-    buffers.emplace_back(bitmapBuffer);
-    buffers.emplace_back(dataBuffer);
+        std::vector<std::shared_ptr<Buffer>> buffers;
+        buffers.emplace_back(bitmapBuffer);
+        buffers.emplace_back(dataBuffer);
 
-    auto int16Type = std::make_shared<Int16Type>();
-    auto arrayData = arrow::ArrayData::Make(int16Type, vectorSize, buffers);
+        auto int16Type = std::make_shared<Int16Type>();
+        auto arrayData = arrow::ArrayData::Make(int16Type, vectorSize, buffers);
 
-    std::vector<std::shared_ptr<Array>> arrayVector;
-    auto numericArray = std::make_shared<NumericArray<Int16Type>>(arrayData);
-    arrayVector.emplace_back(numericArray);
+        std::vector<std::shared_ptr<Array>> arrayVector;
+        auto numericArray = std::make_shared<NumericArray<Int16Type>>(arrayData);
+        arrayVector.emplace_back(numericArray);
 
-    return ChunkedArray::Make(arrayVector, int16Type).ValueOrDie();
+        return ChunkedArray::Make(arrayVector, int16Type).ValueOrDie();
     }
 
-std::shared_ptr<::arrow::ChunkedArray> buildInt32Chunk(DataTypeId typeId, BaseVector *baseVector,
-                                                       bool isSplitWrite = false, long startPos = 0,
-                                                       long endPos = 0) {
-    using T = typename NativeType<OMNI_INT>::type;
-    auto vector = (Vector<T> *) baseVector;
-
-    if (!isSplitWrite) {
-        startPos = 0;
-        endPos = vector->GetSize();
+    std::shared_ptr<::arrow::ChunkedArray> buildInt32Chunk(DataTypeId typeId, BaseVector *baseVector,
+                                                           bool isSplitWrite = false, long startPos = 0,
+                                                           long endPos = 0)
+    {
+        using T = typename NativeType<OMNI_INT>::type;
+        auto vector = (Vector<T> *)baseVector;
+
+        if (!isSplitWrite)
+        {
+            startPos = 0;
+            endPos = vector->GetSize();
         }
 
-    long vectorSize = endPos - startPos;
-    int values[vectorSize];
-    long index=0;
-
-    auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
-    arrow::internal::Bitmap bitmap(bitmapBuffer,0,vectorSize);
-    bitmap.SetBitsTo(true);
-
-
-    for (long j =startPos; j< endPos; j++) {
-        values[index]=vector->GetValue(j);
-        if (vector->IsNull(j)) {
-            bitmap.SetBitTo(index,false);
+        long vectorSize = endPos - startPos;
+        int values[vectorSize];
+        long index = 0;
+
+        auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
+        arrow::internal::Bitmap bitmap(bitmapBuffer, 0, vectorSize);
+        bitmap.SetBitsTo(true);
+
+        for (long j = startPos; j < endPos; j++)
+        {
+            values[index] = vector->GetValue(j);
+            if (vector->IsNull(j))
+            {
+                bitmap.SetBitTo(index, false);
+            }
+            index++;
         }
-        index++;
-    }
 
-    TypedBufferBuilder<int32_t> builder;
-    builder.Resize(vectorSize);
+        TypedBufferBuilder<int32_t> builder;
+        builder.Resize(vectorSize);
 
-    builder.Append(values, vectorSize);
-    auto maybe_buffer = builder.Finish();
-    std::shared_ptr<Buffer> dataBuffer = *maybe_buffer;
+        builder.Append(values, vectorSize);
+        auto maybe_buffer = builder.Finish();
+        std::shared_ptr<Buffer> dataBuffer = *maybe_buffer;
 
-    std::vector<std::shared_ptr<Buffer>> buffers;
+        std::vector<std::shared_ptr<Buffer>> buffers;
 
-    buffers.emplace_back(bitmapBuffer);
-    buffers.emplace_back(dataBuffer);
+        buffers.emplace_back(bitmapBuffer);
+        buffers.emplace_back(dataBuffer);
 
-    auto int32Type = std::make_shared<Int32Type>();
-    auto arrayData = arrow::ArrayData::Make(int32Type, vectorSize, buffers);
+        auto int32Type = std::make_shared<Int32Type>();
+        auto arrayData = arrow::ArrayData::Make(int32Type, vectorSize, buffers);
 
-    std::vector<std::shared_ptr<Array>> arrayVector;
-    auto numericArray = std::make_shared<NumericArray<Int32Type>>(arrayData);
-    arrayVector.emplace_back(numericArray);
+        std::vector<std::shared_ptr<Array>> arrayVector;
+        auto numericArray = std::make_shared<NumericArray<Int32Type>>(arrayData);
+        arrayVector.emplace_back(numericArray);
 
-    return ChunkedArray::Make(arrayVector, int32Type).ValueOrDie();
+        return ChunkedArray::Make(arrayVector, int32Type).ValueOrDie();
     }
 
-std::shared_ptr<::arrow::ChunkedArray> buildInt64Chunk(DataTypeId typeId, BaseVector *baseVector,
-                                                       bool isSplitWrite = false, long startPos = 0,
-                                                       long endPos = 0) {
-    using T = typename NativeType<OMNI_LONG>::type;
-    auto vector = (Vector<T> *) baseVector;
-
-    if (!isSplitWrite) {
-        startPos = 0;
-        endPos = vector->GetSize();
+    std::shared_ptr<::arrow::ChunkedArray> buildInt64Chunk(DataTypeId typeId, BaseVector *baseVector,
+                                                           bool isSplitWrite = false, long startPos = 0,
+                                                           long endPos = 0)
+    {
+        using T = typename NativeType<OMNI_LONG>::type;
+        auto vector = (Vector<T> *)baseVector;
+
+        if (!isSplitWrite)
+        {
+            startPos = 0;
+            endPos = vector->GetSize();
         }
 
-    long vectorSize = endPos - startPos;
-    int64_t values[vectorSize];
-    long index=0;
+        long vectorSize = endPos - startPos;
+        int64_t values[vectorSize];
+        long index = 0;
 
-    auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
-    arrow::internal::Bitmap bitmap(bitmapBuffer,0,vectorSize);
-    bitmap.SetBitsTo(true);
+        auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
+        arrow::internal::Bitmap bitmap(bitmapBuffer, 0, vectorSize);
+        bitmap.SetBitsTo(true);
 
-    //todo why use vector-> Hashll before setbitto will make other colums unormal
+        // todo why use vector-> Hashll before setbitto will make other colums unormal
 
-    for (long j =startPos; j< endPos; j++) {
-        values[index]=vector->GetValue(j);
-        if (vector->IsNull(j)) {
-            bitmap.SetBitTo(index,false);
+        for (long j = startPos; j < endPos; j++)
+        {
+            values[index] = vector->GetValue(j);
+            if (vector->IsNull(j))
+            {
+                bitmap.SetBitTo(index, false);
+            }
+            index++;
         }
-        index++;
-    }
 
-    TypedBufferBuilder<int64_t> builder;
-    builder.Resize(vectorSize);
+        TypedBufferBuilder<int64_t> builder;
+        builder.Resize(vectorSize);
 
-    builder.Append(values, vectorSize);
-    auto maybe_buffer = builder.Finish();
-    std::shared_ptr<Buffer> dataBuffer = *maybe_buffer;
+        builder.Append(values, vectorSize);
+        auto maybe_buffer = builder.Finish();
+        std::shared_ptr<Buffer> dataBuffer = *maybe_buffer;
 
-    std::vector<std::shared_ptr<Buffer>> buffers;
+        std::vector<std::shared_ptr<Buffer>> buffers;
 
-    buffers.emplace_back(bitmapBuffer);
-    buffers.emplace_back(dataBuffer);
+        buffers.emplace_back(bitmapBuffer);
+        buffers.emplace_back(dataBuffer);
 
-    auto int64Type = std::make_shared<Int64Type>();
-    auto arrayData = arrow::ArrayData::Make(int64Type, vectorSize, buffers);
+        auto int64Type = std::make_shared<Int64Type>();
+        auto arrayData = arrow::ArrayData::Make(int64Type, vectorSize, buffers);
 
-    std::vector<std::shared_ptr<Array>> arrayVector;
-    auto numericArray = std::make_shared<NumericArray<Int64Type>>(arrayData);
-    arrayVector.emplace_back(numericArray);
+        std::vector<std::shared_ptr<Array>> arrayVector;
+        auto numericArray = std::make_shared<NumericArray<Int64Type>>(arrayData);
+        arrayVector.emplace_back(numericArray);
 
-    return ChunkedArray::Make(arrayVector, int64Type).ValueOrDie();
+        return ChunkedArray::Make(arrayVector, int64Type).ValueOrDie();
     }
 
-std::shared_ptr<ChunkedArray> buildDate32Chunk(DataTypeId typeId, BaseVector *baseVector,
-                                                       bool isSplitWrite = false, long startPos = 0,
-                                                       long endPos = 0) {
-    using T = typename NativeType<OMNI_DATE32>::type;
-    auto vector = (Vector<T> *) baseVector;
-
-    if (!isSplitWrite) {
-        startPos = 0;
-        endPos = vector->GetSize();
+    std::shared_ptr<ChunkedArray> buildDate32Chunk(DataTypeId typeId, BaseVector *baseVector,
+                                                   bool isSplitWrite = false, long startPos = 0,
+                                                   long endPos = 0)
+    {
+        using T = typename NativeType<OMNI_DATE32>::type;
+        auto vector = (Vector<T> *)baseVector;
+
+        if (!isSplitWrite)
+        {
+            startPos = 0;
+            endPos = vector->GetSize();
         }
 
-    long vectorSize = endPos - startPos;
-    int values[vectorSize];
-    long index=0;
-
-    auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
-    arrow::internal::Bitmap bitmap(bitmapBuffer,0,vectorSize);
-    bitmap.SetBitsTo(true);
-
-
-
-    for (long j =startPos; j< endPos; j++) {
-        values[index]=vector->GetValue(j);
-        if (vector->IsNull(j)) {
-            bitmap.SetBitTo(index,false);
+        long vectorSize = endPos - startPos;
+        int values[vectorSize];
+        long index = 0;
+
+        auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
+        arrow::internal::Bitmap bitmap(bitmapBuffer, 0, vectorSize);
+        bitmap.SetBitsTo(true);
+
+        for (long j = startPos; j < endPos; j++)
+        {
+            values[index] = vector->GetValue(j);
+            if (vector->IsNull(j))
+            {
+                bitmap.SetBitTo(index, false);
+            }
+            index++;
         }
-        index++;
-    }
 
-    TypedBufferBuilder<int32_t> builder;
-    builder.Resize(vectorSize);
+        TypedBufferBuilder<int32_t> builder;
+        builder.Resize(vectorSize);
 
-    builder.Append(values, vectorSize);
-    auto maybe_buffer = builder.Finish();
-    std::shared_ptr<Buffer> dataBuffer = *maybe_buffer;
+        builder.Append(values, vectorSize);
+        auto maybe_buffer = builder.Finish();
+        std::shared_ptr<Buffer> dataBuffer = *maybe_buffer;
 
-    std::vector<std::shared_ptr<Buffer>> buffers;
+        std::vector<std::shared_ptr<Buffer>> buffers;
 
-    buffers.emplace_back(bitmapBuffer);
-    buffers.emplace_back(dataBuffer);
+        buffers.emplace_back(bitmapBuffer);
+        buffers.emplace_back(dataBuffer);
 
-    auto date32Type = std::make_shared<arrow::Date32Type>();
-    auto arrayData = arrow::ArrayData::Make(date32Type, vectorSize, buffers);
+        auto date32Type = std::make_shared<arrow::Date32Type>();
+        auto arrayData = arrow::ArrayData::Make(date32Type, vectorSize, buffers);
 
-    //todo Array or arrow::Array
-    std::vector<std::shared_ptr<Array>> arrayVector;
-    auto date32Array = std::make_shared<NumericArray<arrow::Date32Type>>(arrayData);
-    arrayVector.emplace_back(date32Array);
+        std::vector<std::shared_ptr<Array>> arrayVector;
+        auto date32Array = std::make_shared<NumericArray<arrow::Date32Type>>(arrayData);
+        arrayVector.emplace_back(date32Array);
 
-    return ChunkedArray::Make(arrayVector, date32Type).ValueOrDie();
+        return ChunkedArray::Make(arrayVector, date32Type).ValueOrDie();
     }
 
-
-std::shared_ptr<ChunkedArray> buildDate64Chunk(DataTypeId typeId, BaseVector *baseVector,
-                                                       bool isSplitWrite = false, long startPos = 0,
-                                                       long endPos = 0) {
-    using T = typename NativeType<OMNI_DATE64>::type;
-    auto vector = (Vector<T> *) baseVector;
-
-    if (!isSplitWrite) {
-        startPos = 0;
-        endPos = vector->GetSize();
+    std::shared_ptr<ChunkedArray> buildDate64Chunk(DataTypeId typeId, BaseVector *baseVector,
+                                                   bool isSplitWrite = false, long startPos = 0,
+                                                   long endPos = 0)
+    {
+        using T = typename NativeType<OMNI_DATE64>::type;
+        auto vector = (Vector<T> *)baseVector;
+
+        if (!isSplitWrite)
+        {
+            startPos = 0;
+            endPos = vector->GetSize();
         }
 
-    long vectorSize = endPos - startPos;
-    int64_t values[vectorSize];
-    long index=0;
-
-    auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
-    arrow::internal::Bitmap bitmap(bitmapBuffer,0,vectorSize);
-    bitmap.SetBitsTo(true);
-
-
-    for (long j =startPos; j< endPos; j++) {
-        values[index]=vector->GetValue(j);
-        if (vector->IsNull(j)) {
-            bitmap.SetBitTo(index,false);
+        long vectorSize = endPos - startPos;
+        int64_t values[vectorSize];
+        long index = 0;
+
+        auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
+        arrow::internal::Bitmap bitmap(bitmapBuffer, 0, vectorSize);
+        bitmap.SetBitsTo(true);
+
+        for (long j = startPos; j < endPos; j++)
+        {
+            values[index] = vector->GetValue(j);
+            if (vector->IsNull(j))
+            {
+                bitmap.SetBitTo(index, false);
+            }
+            index++;
         }
-        index++;
-    }
 
-    TypedBufferBuilder<int64_t> builder;
-    builder.Resize(vectorSize);
-    builder.Append(values, vectorSize);
-    auto maybe_buffer = builder.Finish();
-    std::shared_ptr<Buffer> dataBuffer = *maybe_buffer;
+        TypedBufferBuilder<int64_t> builder;
+        builder.Resize(vectorSize);
+        builder.Append(values, vectorSize);
+        auto maybe_buffer = builder.Finish();
+        std::shared_ptr<Buffer> dataBuffer = *maybe_buffer;
 
-    std::vector<std::shared_ptr<Buffer>> buffers;
-    buffers.emplace_back(bitmapBuffer);
-    buffers.emplace_back(dataBuffer);
+        std::vector<std::shared_ptr<Buffer>> buffers;
+        buffers.emplace_back(bitmapBuffer);
+        buffers.emplace_back(dataBuffer);
 
-    auto date64Type = std::make_shared<arrow::Date64Type>();
-    auto arrayData = arrow::ArrayData::Make(date64Type, vectorSize, buffers);
+        auto date64Type = std::make_shared<arrow::Date64Type>();
+        auto arrayData = arrow::ArrayData::Make(date64Type, vectorSize, buffers);
 
-    std::vector<std::shared_ptr<Array>> arrayVector;
-    auto date64Array = std::make_shared<NumericArray<arrow::Date64Type>>(arrayData);
-    arrayVector.emplace_back(date64Array);
+        std::vector<std::shared_ptr<Array>> arrayVector;
+        auto date64Array = std::make_shared<NumericArray<arrow::Date64Type>>(arrayData);
+        arrayVector.emplace_back(date64Array);
 
-    return ChunkedArray::Make(arrayVector, date64Type).ValueOrDie();
+        return ChunkedArray::Make(arrayVector, date64Type).ValueOrDie();
     }
 
-std::shared_ptr<arrow::ChunkedArray> buildDoubleChunk(DataTypeId typeId, BaseVector *baseVector,
-                                                       bool isSplitWrite = false, long startPos = 0,
-                                                       long endPos = 0) {
-    using T = typename NativeType<OMNI_DOUBLE>::type;
-    auto vector = (Vector<T> *) baseVector;
-
-    if (!isSplitWrite) {
-        startPos = 0;
-        endPos = vector->GetSize();
+    std::shared_ptr<arrow::ChunkedArray> buildDoubleChunk(DataTypeId typeId, BaseVector *baseVector,
+                                                          bool isSplitWrite = false, long startPos = 0,
+                                                          long endPos = 0)
+    {
+        using T = typename NativeType<OMNI_DOUBLE>::type;
+        auto vector = (Vector<T> *)baseVector;
+
+        if (!isSplitWrite)
+        {
+            startPos = 0;
+            endPos = vector->GetSize();
         }
 
-    long vectorSize = endPos - startPos;
-    double values[vectorSize];
-    long index=0;
-
-    auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
-    arrow::internal::Bitmap bitmap(bitmapBuffer,0,vectorSize);
-    bitmap.SetBitsTo(true);
-
-
-    for (long j =startPos; j< endPos; j++) {
-        values[index]=vector->GetValue(j);
-        if (vector->IsNull(j)) {
-            bitmap.SetBitTo(index,false);
+        long vectorSize = endPos - startPos;
+        double values[vectorSize];
+        long index = 0;
+
+        auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
+        arrow::internal::Bitmap bitmap(bitmapBuffer, 0, vectorSize);
+        bitmap.SetBitsTo(true);
+
+        for (long j = startPos; j < endPos; j++)
+        {
+            values[index] = vector->GetValue(j);
+            if (vector->IsNull(j))
+            {
+                bitmap.SetBitTo(index, false);
+            }
+            index++;
         }
-        index++;
-    }
 
-    TypedBufferBuilder<double> builder;
-    builder.Resize(vectorSize);
+        TypedBufferBuilder<double> builder;
+        builder.Resize(vectorSize);
 
-    builder.Append(values, vectorSize);
-    auto maybe_buffer = builder.Finish();
-    std::shared_ptr<Buffer> dataBuffer = *maybe_buffer;
+        builder.Append(values, vectorSize);
+        auto maybe_buffer = builder.Finish();
+        std::shared_ptr<Buffer> dataBuffer = *maybe_buffer;
 
-    std::vector<std::shared_ptr<Buffer>> buffers;
-    buffers.emplace_back(bitmapBuffer);
-    buffers.emplace_back(dataBuffer);
+        std::vector<std::shared_ptr<Buffer>> buffers;
+        buffers.emplace_back(bitmapBuffer);
+        buffers.emplace_back(dataBuffer);
 
-    auto doubleType = std::make_shared<arrow::DoubleType>();
-    auto arrayData = arrow::ArrayData::Make(doubleType, vectorSize, buffers);
+        auto doubleType = std::make_shared<arrow::DoubleType>();
+        auto arrayData = arrow::ArrayData::Make(doubleType, vectorSize, buffers);
 
-    std::vector<std::shared_ptr<Array>> arrayVector;
-    auto doubleArray = std::make_shared<NumericArray<arrow::DoubleType>>(arrayData);
-    arrayVector.emplace_back(doubleArray);
+        std::vector<std::shared_ptr<Array>> arrayVector;
+        auto doubleArray = std::make_shared<NumericArray<arrow::DoubleType>>(arrayData);
+        arrayVector.emplace_back(doubleArray);
 
-    return ChunkedArray::Make(arrayVector, doubleType).ValueOrDie();
+        return ChunkedArray::Make(arrayVector, doubleType).ValueOrDie();
     }
 
-std::shared_ptr<ChunkedArray> buildVarcharChunk(DataTypeId typeId, BaseVector* baseVector,
-                                                       bool isSplitWrite = false, long startPos = 0,
-                                                       long endPos = 0) {
-    auto vector = dynamic_cast<Vector<LargeStringContainer<std::string_view>> *>(baseVector);
+    std::shared_ptr<ChunkedArray> buildVarcharChunk(DataTypeId typeId, BaseVector *baseVector,
+                                                    bool isSplitWrite = false, long startPos = 0,
+                                                    long endPos = 0)
+    {
+        auto vector = dynamic_cast<Vector<LargeStringContainer<std::string_view>> *>(baseVector);
 
-    if (!isSplitWrite) {
-        startPos = 0;
-        endPos = vector->GetSize();
+        if (!isSplitWrite)
+        {
+            startPos = 0;
+            endPos = vector->GetSize();
         }
 
-    long vectorSize = endPos - startPos;
-    auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
-    arrow::internal::Bitmap bitmap(bitmapBuffer,0,vectorSize);
-    bitmap.SetBitsTo(true);
-
-
-    TypedBufferBuilder<int32_t> offsetsBuilder;
-    TypedBufferBuilder<char> valuesBuilder;
-    int32_t current_offset = 0;
-    offsetsBuilder.Append(0);
-    valuesBuilder.Resize(vectorSize);
-
-    long index=0;
-    for (long j =startPos; j< endPos; j++) {
-        if (vector->IsNull(j)) {
-            bitmap.SetBitTo(index,false);
+        long vectorSize = endPos - startPos;
+        auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
+        arrow::internal::Bitmap bitmap(bitmapBuffer, 0, vectorSize);
+        bitmap.SetBitsTo(true);
+
+        TypedBufferBuilder<int32_t> offsetsBuilder;
+        TypedBufferBuilder<char> valuesBuilder;
+        int32_t current_offset = 0;
+        offsetsBuilder.Append(0);
+        valuesBuilder.Resize(vectorSize);
+
+        long index = 0;
+        for (long j = startPos; j < endPos; j++)
+        {
+            if (vector->IsNull(j))
+            {
+                bitmap.SetBitTo(index, false);
+            }
+            index++;
+            std::string strValue = std::string(vector->GetValue(j));
+            const char *cStr = strValue.c_str();
+            size_t length = strValue.length();
+            char *charArray = new char[length + 1];
+            strcpy(charArray, cStr);
+            current_offset += length;
+            offsetsBuilder.Append(current_offset);
+            valuesBuilder.Append(charArray, length);
+            delete[] charArray;
         }
-        index++;
-        std::string strValue = std::string(vector->GetValue(j));
-        const char* cStr = strValue.c_str();
-        size_t length=strValue.length();
-        char* charArray = new char[length+1];
-        strcpy(charArray, cStr);
-        current_offset += length;
-        offsetsBuilder.Append(current_offset);
-        valuesBuilder.Append(charArray,length);
-        delete [] charArray;
-    }
 
-    auto offsetsBuffer = offsetsBuilder.Finish().ValueOrDie();
-    auto valuesBuffer = valuesBuilder.Finish().ValueOrDie();
+        auto offsetsBuffer = offsetsBuilder.Finish().ValueOrDie();
+        auto valuesBuffer = valuesBuilder.Finish().ValueOrDie();
 
-    std::vector<std::shared_ptr<Buffer>> buffers;
+        std::vector<std::shared_ptr<Buffer>> buffers;
 
-    buffers.emplace_back(bitmapBuffer);
-    buffers.emplace_back(offsetsBuffer);
-    buffers.emplace_back(valuesBuffer);
+        buffers.emplace_back(bitmapBuffer);
+        buffers.emplace_back(offsetsBuffer);
+        buffers.emplace_back(valuesBuffer);
 
-    auto utf8Type = std::make_shared<arrow::StringType>();
-    auto arrayData = arrow::ArrayData::Make(utf8Type, vectorSize, buffers);
+        auto utf8Type = std::make_shared<arrow::StringType>();
+        auto arrayData = arrow::ArrayData::Make(utf8Type, vectorSize, buffers);
 
-    std::vector<std::shared_ptr<Array>> arrayVector;
-    auto stringArray = std::make_shared<StringArray>(arrayData);
-    arrayVector.emplace_back(stringArray);
+        std::vector<std::shared_ptr<Array>> arrayVector;
+        auto stringArray = std::make_shared<StringArray>(arrayData);
+        arrayVector.emplace_back(stringArray);
 
-    return ChunkedArray::Make(arrayVector, utf8Type).ValueOrDie();
+        return ChunkedArray::Make(arrayVector, utf8Type).ValueOrDie();
     }
 
-//arrow parquet don't have Decimal64 Type ,use Decimal128 instead of it
-std::shared_ptr<arrow::ChunkedArray> buildDecimal64Chunk(DataTypeId typeId, BaseVector *baseVector, int precision, int scale,
-                                                       bool isSplitWrite = false, long startPos = 0,
-                                                       long endPos = 0) {
-    using T = typename NativeType<OMNI_DECIMAL64>::type;
-    auto vector = (Vector<T> *) baseVector;
-
-    if (!isSplitWrite) {
-        startPos = 0;
-        endPos = vector->GetSize();
+    // arrow parquet don't have Decimal64 Type ,use Decimal128 instead of it
+    std::shared_ptr<arrow::ChunkedArray> buildDecimal64Chunk(DataTypeId typeId, BaseVector *baseVector, 
+                                                            int precision, int scale, bool isSplitWrite = false, 
+                                                            long startPos = 0, long endPos = 0)
+    {
+        using T = typename NativeType<OMNI_DECIMAL64>::type;
+        auto vector = (Vector<T> *)baseVector;
+
+        if (!isSplitWrite)
+        {
+            startPos = 0;
+            endPos = vector->GetSize();
         }
 
-    long vectorSize = endPos - startPos;
-    auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
-    arrow::internal::Bitmap bitmap(bitmapBuffer,0,vectorSize);
-    bitmap.SetBitsTo(true);
-
-    BufferBuilder builder;
-    builder.Resize(vectorSize);
-    std::vector<arrow::Decimal128> decimalArray;
-
-    long index=0;
-    for (long j =startPos; j< endPos; j++) {
-
-        BasicDecimal128 basicDecimal128(0,vector->GetValue(j));
-        decimalArray.emplace_back(BasicDecimal128(basicDecimal128));
-        if (vector->IsNull(j)) {
-            bitmap.SetBitTo(index,false);
+        long vectorSize = endPos - startPos;
+        auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
+        arrow::internal::Bitmap bitmap(bitmapBuffer, 0, vectorSize);
+        bitmap.SetBitsTo(true);
+
+        BufferBuilder builder;
+        builder.Resize(vectorSize);
+        std::vector<arrow::Decimal128> decimalArray;
+
+        long index = 0;
+        for (long j = startPos; j < endPos; j++)
+        {
+
+            BasicDecimal128 basicDecimal128(0, vector->GetValue(j));
+            decimalArray.emplace_back(BasicDecimal128(basicDecimal128));
+            if (vector->IsNull(j))
+            {
+                bitmap.SetBitTo(index, false);
+            }
+            index++;
         }
-        index++;
-    }
 
-    builder.Append(decimalArray.data(), decimalArray.size()*16);
-    auto maybe_buffer = builder.Finish();
-    std::shared_ptr<Buffer> dataBuffer = *maybe_buffer;
+        builder.Append(decimalArray.data(), decimalArray.size() * 16);
+        auto maybe_buffer = builder.Finish();
+        std::shared_ptr<Buffer> dataBuffer = *maybe_buffer;
 
-    std::vector<std::shared_ptr<Buffer>> buffers;
-    buffers.emplace_back(bitmapBuffer);
-    buffers.emplace_back(dataBuffer);
+        std::vector<std::shared_ptr<Buffer>> buffers;
+        buffers.emplace_back(bitmapBuffer);
+        buffers.emplace_back(dataBuffer);
 
-    auto decimal128Type = std::make_shared<arrow::Decimal128Type>(precision,scale);
-    auto arrayData = arrow::ArrayData::Make(decimal128Type, vectorSize, buffers);
+        auto decimal128Type = std::make_shared<arrow::Decimal128Type>(precision, scale);
+        auto arrayData = arrow::ArrayData::Make(decimal128Type, vectorSize, buffers);
 
-    std::vector<std::shared_ptr<Array>> arrayVector;
-    auto decimal128Array = std::make_shared<Decimal128Array>(arrayData);
-    arrayVector.emplace_back(decimal128Array);
+        std::vector<std::shared_ptr<Array>> arrayVector;
+        auto decimal128Array = std::make_shared<Decimal128Array>(arrayData);
+        arrayVector.emplace_back(decimal128Array);
 
-    return ChunkedArray::Make(arrayVector, decimal128Type).ValueOrDie();
+        return ChunkedArray::Make(arrayVector, decimal128Type).ValueOrDie();
     }
 
-
-std::shared_ptr<ChunkedArray> buildDecimal128Chunk(DataTypeId typeId, BaseVector *baseVector, int precision, int scale,
-                                                       bool isSplitWrite = false, long startPos = 0,
-                                                       long endPos = 0) {
-    using T = typename NativeType<OMNI_DECIMAL128>::type;
-    auto vector = (Vector<T> *) baseVector;
-
-    if (!isSplitWrite) {
-        startPos = 0;
-        endPos = vector->GetSize();
+    std::shared_ptr<ChunkedArray> buildDecimal128Chunk(DataTypeId typeId, BaseVector *baseVector, 
+                                                      int precision, int scale, bool isSplitWrite = false, 
+                                                      long startPos = 0, long endPos = 0)
+    {
+        using T = typename NativeType<OMNI_DECIMAL128>::type;
+        auto vector = (Vector<T> *)baseVector;
+
+        if (!isSplitWrite)
+        {
+            startPos = 0;
+            endPos = vector->GetSize();
         }
 
-    long vectorSize = endPos - startPos;
-    auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
-    arrow::internal::Bitmap bitmap(bitmapBuffer,0,vectorSize);
-    bitmap.SetBitsTo(true);
-
-    BufferBuilder builder;
-    builder.Resize(vectorSize);
-    std::vector<arrow::Decimal128> decimalArray;
-
-    long index=0;
-    for (long j =startPos; j< endPos; j++) {
-        auto decimalValue=vector->GetValue(j);
-        BasicDecimal128 basicDecimal128(vector->GetValue(j).HighBits(),vector->GetValue(j).LowBits());
-        decimalArray.emplace_back(BasicDecimal128(basicDecimal128));
-
-        decimalArray.back().Rescale(2,2);
-        if (vector->IsNull(j)) {
-            bitmap.SetBitTo(index,false);
+        long vectorSize = endPos - startPos;
+        auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
+        arrow::internal::Bitmap bitmap(bitmapBuffer, 0, vectorSize);
+        bitmap.SetBitsTo(true);
+
+        BufferBuilder builder;
+        builder.Resize(vectorSize);
+        std::vector<arrow::Decimal128> decimalArray;
+
+        long index = 0;
+        for (long j = startPos; j < endPos; j++)
+        {
+            auto decimalValue = vector->GetValue(j);
+            BasicDecimal128 basicDecimal128(vector->GetValue(j).HighBits(), vector->GetValue(j).LowBits());
+            decimalArray.emplace_back(BasicDecimal128(basicDecimal128));
+
+            decimalArray.back().Rescale(2, 2);
+            if (vector->IsNull(j))
+            {
+                bitmap.SetBitTo(index, false);
+            }
+            index++;
         }
-        index++;
-    }
 
-    builder.Append(decimalArray.data(), decimalArray.size()*16);
-    auto maybe_buffer = builder.Finish();
-    std::shared_ptr<Buffer> dataBuffer = *maybe_buffer;
+        builder.Append(decimalArray.data(), decimalArray.size() * 16);
+        auto maybe_buffer = builder.Finish();
+        std::shared_ptr<Buffer> dataBuffer = *maybe_buffer;
 
-    std::vector<std::shared_ptr<Buffer>> buffers;
-    buffers.emplace_back(bitmapBuffer);
-    buffers.emplace_back(dataBuffer);
+        std::vector<std::shared_ptr<Buffer>> buffers;
+        buffers.emplace_back(bitmapBuffer);
+        buffers.emplace_back(dataBuffer);
 
-    auto decimal128Type = std::make_shared<arrow::Decimal128Type>(precision,scale);
-    auto arrayData = arrow::ArrayData::Make(decimal128Type, vectorSize, buffers);
+        auto decimal128Type = std::make_shared<arrow::Decimal128Type>(precision, scale);
+        auto arrayData = arrow::ArrayData::Make(decimal128Type, vectorSize, buffers);
 
-    std::vector<std::shared_ptr<Array>> arrayVector;
-    auto decimal128Array = std::make_shared<Decimal128Array>(arrayData);
-    arrayVector.emplace_back(decimal128Array);
+        std::vector<std::shared_ptr<Array>> arrayVector;
+        auto decimal128Array = std::make_shared<Decimal128Array>(arrayData);
+        arrayVector.emplace_back(decimal128Array);
 
-    return ChunkedArray::Make(arrayVector, decimal128Type).ValueOrDie();
+        return ChunkedArray::Make(arrayVector, decimal128Type).ValueOrDie();
     }
 
-void ParquetWriter::write(long * vecNativeId, int colNums,
-                         const int *omniTypes,
-                         const unsigned char *dataColumnsIds,
-                         bool isSplitWrite, long startPos, long endPos) {
-    std::vector<std::shared_ptr<::arrow::ChunkedArray>> chunks;
-    int decimalIndex = 0;
-    int precision = 0;
-    int scale =0 ;
-    for (int i = 0; i < colNums; ++i) {
-       if (!dataColumnsIds[i]) {
-           continue;
-       }
-
-    auto vec = (BaseVector *)vecNativeId[i];
-    auto typeId = static_cast<DataTypeId>(omniTypes[i]);
-    switch (typeId) {
-        case OMNI_BOOLEAN:
-            chunks.emplace_back(buildBooleanChunk(typeId, vec, isSplitWrite, startPos, endPos));
-            break;
-        case OMNI_SHORT:
-            chunks.emplace_back(buildInt16Chunk(typeId, vec, isSplitWrite, startPos, endPos));
-            break;
-        case OMNI_INT:
-            chunks.emplace_back(buildInt32Chunk(typeId, vec, isSplitWrite, startPos, endPos));
-            break;
-        case OMNI_LONG:
-            chunks.emplace_back(buildInt64Chunk(typeId, vec, isSplitWrite, startPos, endPos));
-            break;
-        case OMNI_DATE32:
-            chunks.emplace_back(buildDate32Chunk(typeId, vec, isSplitWrite, startPos, endPos));
-            break;
-        case OMNI_DATE64:
-            chunks.emplace_back(buildDate64Chunk(typeId, vec, isSplitWrite, startPos, endPos));
-            break;
-        case OMNI_DOUBLE:
-            chunks.emplace_back(buildDoubleChunk(typeId, vec, isSplitWrite, startPos, endPos));
-            break;
-        case OMNI_VARCHAR:
-            chunks.emplace_back(buildVarcharChunk(typeId, vec, isSplitWrite, startPos, endPos));
-            break;
-        case OMNI_DECIMAL64:
-            precision = precisions[decimalIndex];
-            scale = scales[decimalIndex];
-            chunks.emplace_back(buildDecimal64Chunk(typeId, vec, precision, scale, isSplitWrite, startPos, endPos));
-            decimalIndex++;
-            break;
-        case OMNI_DECIMAL128:
-            precision = precisions[decimalIndex];
-            scale = scales[decimalIndex];
-            chunks.emplace_back(buildDecimal128Chunk(typeId, vec, precision, scale, isSplitWrite, startPos, endPos));
-            decimalIndex++;
-            break;
-        default:
-            throw std::runtime_error(
-                "Native columnar write not support for this type: " + typeId);
-
+    void ParquetWriter::write(long *vecNativeId, int colNums,
+                              const int *omniTypes,
+                              const unsigned char *dataColumnsIds,
+                              bool isSplitWrite, long startPos, long endPos)
+    {
+        std::vector<std::shared_ptr<::arrow::ChunkedArray>> chunks;
+        int decimalIndex = 0;
+        int precision = 0;
+        int scale = 0;
+        for (int i = 0; i < colNums; ++i)
+        {
+            if (!dataColumnsIds[i])
+            {
+                continue;
+            }
+
+            auto vec = (BaseVector *)vecNativeId[i];
+            auto typeId = static_cast<DataTypeId>(omniTypes[i]);
+            switch (typeId)
+            {
+            case OMNI_BOOLEAN:
+                chunks.emplace_back(buildBooleanChunk(typeId, vec, isSplitWrite, startPos, endPos));
+                break;
+            case OMNI_SHORT:
+                chunks.emplace_back(buildInt16Chunk(typeId, vec, isSplitWrite, startPos, endPos));
+                break;
+            case OMNI_INT:
+                chunks.emplace_back(buildInt32Chunk(typeId, vec, isSplitWrite, startPos, endPos));
+                break;
+            case OMNI_LONG:
+                chunks.emplace_back(buildInt64Chunk(typeId, vec, isSplitWrite, startPos, endPos));
+                break;
+            case OMNI_DATE32:
+                chunks.emplace_back(buildDate32Chunk(typeId, vec, isSplitWrite, startPos, endPos));
+                break;
+            case OMNI_DATE64:
+                chunks.emplace_back(buildDate64Chunk(typeId, vec, isSplitWrite, startPos, endPos));
+                break;
+            case OMNI_DOUBLE:
+                chunks.emplace_back(buildDoubleChunk(typeId, vec, isSplitWrite, startPos, endPos));
+                break;
+            case OMNI_VARCHAR:
+                chunks.emplace_back(buildVarcharChunk(typeId, vec, isSplitWrite, startPos, endPos));
+                break;
+            case OMNI_DECIMAL64:
+                precision = precisions[decimalIndex];
+                scale = scales[decimalIndex];
+                chunks.emplace_back(buildDecimal64Chunk(typeId, vec, precision, scale, isSplitWrite, startPos, endPos));
+                decimalIndex++;
+                break;
+            case OMNI_DECIMAL128:
+                precision = precisions[decimalIndex];
+                scale = scales[decimalIndex];
+                chunks.emplace_back(buildDecimal128Chunk(typeId, vec, precision, scale, isSplitWrite, startPos, endPos));
+                decimalIndex++;
+                break;
+            default:
+                throw std::runtime_error(
+                    "Native columnar write not support for this type: " + typeId);
+            }
         }
-    }
-     auto numRows = (chunks.empty() ? 0 : chunks[0]->length());
+        auto numRows = (chunks.empty() ? 0 : chunks[0]->length());
 
-     auto table = arrow::Table::Make(schema_, std::move(chunks), numRows);
-     PARQUET_THROW_NOT_OK(arrow_writer->WriteTable(*table));
-     PARQUET_THROW_NOT_OK(arrow_writer->Close());
-}
+        auto table = arrow::Table::Make(schema_, std::move(chunks), numRows);
+        PARQUET_THROW_NOT_OK(arrow_writer->WriteTable(*table));
+        PARQUET_THROW_NOT_OK(arrow_writer->Close());
+    }
 
-}//namespace omniruntime::writer
+} // namespace omniruntime::writer
diff --git a/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWriter.h b/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWriter.h
index 8e5259b3a..33631f4a8 100644
--- a/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWriter.h
+++ b/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWriter.h
@@ -1,5 +1,5 @@
 /**
-* Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved.
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -28,23 +28,24 @@
 #include "parquet/arrow/writer.h"
 
 using namespace arrow::internal;
- 
-namespace omniruntime::writer {
-class ParquetWriter {
-public:
-	ParquetWriter() {}
-	
-	arrow::Status InitRecordWriter(UriInfo &uri, std::string &ugi);
-	std::shared_ptr<arrow::Field> BuildField(const std::string& name, int typeId, bool nullable);
-	void write(long *vecNativeId, int colNums, const int *omniTypes, const unsigned char *dataColumnsIds,
-					bool isSplitWrite = false, long starPos = 0,
-					long endPos = 0);
-	void write();
 
-public:
-	std::unique_ptr<parquet::arrow::FileWriter> arrow_writer;
-	std::shared_ptr<arrow::Schema> schema_;
-	std::vector<int> precisions;
-	std::vector<int> scales;
+namespace omniruntime::writer
+{
+	class ParquetWriter
+	{
+	public:
+		ParquetWriter() {}
+
+		arrow::Status InitRecordWriter(UriInfo &uri, std::string &ugi);
+		std::shared_ptr<arrow::Field> BuildField(const std::string &name, int typeId, bool nullable);
+		void write(long *vecNativeId, int colNums, const int *omniTypes, const unsigned char *dataColumnsIds,
+				   bool isSplitWrite = false, long starPos = 0, long endPos = 0);
+		void write();
+
+	public:
+		std::unique_ptr<parquet::arrow::FileWriter> arrow_writer;
+		std::shared_ptr<arrow::Schema> schema_;
+		std::vector<int> precisions;
+		std::vector<int> scales;
 	};
 }
\ No newline at end of file
diff --git a/omnioperator/omniop-spark-extension/java/src/main/java/com/huawei/boostkit/spark/jni/ParquetColumnarBatchWriter.java b/omnioperator/omniop-spark-extension/java/src/main/java/com/huawei/boostkit/spark/jni/ParquetColumnarBatchWriter.java
index fd5ba784a..4a299a2b0 100644
--- a/omnioperator/omniop-spark-extension/java/src/main/java/com/huawei/boostkit/spark/jni/ParquetColumnarBatchWriter.java
+++ b/omnioperator/omniop-spark-extension/java/src/main/java/com/huawei/boostkit/spark/jni/ParquetColumnarBatchWriter.java
@@ -206,20 +206,20 @@ public class ParquetColumnarBatchWriter {
         }
     }
 
-    public void initializeSchemaJava(StructType dataSchema){
+    public void initializeSchemaJava(StructType dataSchema) {
         int schemaLength = dataSchema.length();
-        String [] fieldNames = new String[schemaLength];
+        String[] fieldNames = new String[schemaLength];
         int[] fieldTypes = new int[schemaLength];
         boolean[] nullables = new boolean[schemaLength];
-        String [] metaDataKeys = new String[schemaLength];
-        String [] metaDataValues = new String[schemaLength];
-        for (int i = 0; i < schemaLength; i++){
+        String[] metaDataKeys = new String[schemaLength];
+        String[] metaDataValues = new String[schemaLength];
+        for (int i = 0; i < schemaLength; i++) {
             StructField field = dataSchema.fields()[i];
             fieldNames[i] = field.name();
             fieldTypes[i] = sparkTypeToParquetLibType(field.dataType());
             nullables[i] = field.nullable();
         }
-        jniWriter.initializeSchema(writer, fieldNames, fieldTypes, nullables,extractDecimalParam(dataSchema));
+        jniWriter.initializeSchema(writer, fieldNames, fieldTypes, nullables, extractDecimalParam(dataSchema));
     }
 
     public int sparkTypeToParquetLibType(DataType dataType) {
@@ -229,7 +229,7 @@ public class ParquetColumnarBatchWriter {
             return ParquetLibTypeKind.INT16.ordinal();
         } else if (dataType instanceof IntegerType) {
             IntegerType integerType = (IntegerType) dataType;
-            switch (integerType.defaultSize()){
+            switch (integerType.defaultSize()) {
                 case 1:
                     return ParquetLibTypeKind.INT8.ordinal();
                 case 2:
@@ -246,7 +246,7 @@ public class ParquetColumnarBatchWriter {
             return ParquetLibTypeKind.INT64.ordinal();
         } else if (dataType instanceof DateType) {
             DateType dateType = (DateType) dataType;
-            switch (dateType.defaultSize()){
+            switch (dateType.defaultSize()) {
                 case 4:
                     return ParquetLibTypeKind.DATE32.ordinal();
                 case 8:
@@ -265,7 +265,7 @@ public class ParquetColumnarBatchWriter {
             return ParquetLibTypeKind.STRING.ordinal();
         } else if (dataType instanceof DecimalType) {
             DecimalType decimalType = (DecimalType) dataType;
-            switch (decimalType.defaultSize()){
+            switch (decimalType.defaultSize()) {
                 case 8:
                     return ParquetLibTypeKind.DECIMAL128.ordinal();
                 case 16:
diff --git a/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/OmniFileFormatDataWriter.scala b/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/OmniFileFormatDataWriter.scala
index 5adbd6c5d..ccb312da1 100644
--- a/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/OmniFileFormatDataWriter.scala
+++ b/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/OmniFileFormatDataWriter.scala
@@ -64,8 +64,8 @@ class OmniSingleDirectoryDataWriter(
 
     currentWriter match {
       case _: OmniParquetOutputWriter =>
-              currentWriter.asInstanceOf[OmniParquetOutputWriter]
-                .initialize(description.allColumns, description.dataColumns)
+        currentWriter.asInstanceOf[OmniParquetOutputWriter]
+          .initialize(description.allColumns, description.dataColumns)
       case _: OmniOrcOutputWriter =>
         currentWriter.asInstanceOf[OmniOrcOutputWriter]
           .initialize(description.allColumns, description.dataColumns)
@@ -243,8 +243,8 @@ abstract class OmniBaseDynamicPartitionDataWriter(
 
     currentWriter match {
       case _: OmniParquetOutputWriter =>
-              currentWriter.asInstanceOf[OmniParquetOutputWriter]
-                .initialize(description.allColumns, description.dataColumns)
+        currentWriter.asInstanceOf[OmniParquetOutputWriter]
+          .initialize(description.allColumns, description.dataColumns)
       case _: OmniOrcOutputWriter =>
         currentWriter.asInstanceOf[OmniOrcOutputWriter]
           .initialize(description.allColumns, description.dataColumns)
@@ -282,15 +282,15 @@ abstract class OmniBaseDynamicPartitionDataWriter(
     // TODO After add OmniParquetOutPutWriter need extract
     //  a abstract interface named OmniOutPutWriter
     currentWriter match {
-              case _: OmniParquetOutputWriter =>
-                      assert(currentWriter.isInstanceOf[OmniParquetOutputWriter])
-                      currentWriter.asInstanceOf[OmniParquetOutputWriter].spiltWrite(record,startPos,endPos)
-              case _: OmniOrcOutputWriter =>
-                      assert(currentWriter.isInstanceOf[OmniOrcOutputWriter])
-                      currentWriter.asInstanceOf[OmniOrcOutputWriter].spiltWrite(record,startPos,endPos)
-              case _ =>
-                throw new UnsupportedOperationException
-                (s"writeRecord Unsupported ${currentWriter.getClass} Output writer!")
+      case _: OmniParquetOutputWriter =>
+        assert(currentWriter.isInstanceOf[OmniParquetOutputWriter])
+        currentWriter.asInstanceOf[OmniParquetOutputWriter].spiltWrite(record, startPos, endPos)
+      case _: OmniOrcOutputWriter =>
+        assert(currentWriter.isInstanceOf[OmniOrcOutputWriter])
+        currentWriter.asInstanceOf[OmniOrcOutputWriter].spiltWrite(record, startPos, endPos)
+      case _ =>
+        throw new UnsupportedOperationException
+        (s"writeRecord Unsupported ${currentWriter.getClass} Output writer!")
     }
 
     statsTrackers.foreach(_.newRow(currentWriter.path, record))
diff --git a/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ OmniParquetOutputWriter.scala b/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ OmniParquetOutputWriter.scala
index 92f99717f..c5d9fa06e 100644
--- a/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ OmniParquetOutputWriter.scala	
+++ b/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ OmniParquetOutputWriter.scala	
@@ -49,10 +49,10 @@ class OmniParquetOutputWriter(path: String, dataSchema: StructType,
       omniTypes = omniTypes :+ sparkTypeToOmniType(field.dataType, field.metadata).getId.ordinal()
     })
 
-  allColumns.toStructType.foreach(field => {
-         allOmniTypes = allOmniTypes :+ sparkTypeToOmniType(field.dataType, field.metadata)
-           .getId.ordinal()
-   })
+    allColumns.toStructType.foreach(field => {
+      allOmniTypes = allOmniTypes :+ sparkTypeToOmniType(field.dataType, field.metadata)
+        .getId.ordinal()
+    })
     dataColumnsIds = allColumns.map(x => dataColumns.contains(x)).toArray
   }
 
@@ -62,9 +62,9 @@ class OmniParquetOutputWriter(path: String, dataSchema: StructType,
   }
 
   def spiltWrite(row: InternalRow, startPos: Long, endPos: Long): Unit = {
-      assert(row.isInstanceOf[OmniInternalRow])
-      writer.splitWrite(omniTypes, allOmniTypes, dataColumnsIds,
-        row.asInstanceOf[OmniInternalRow].batch, startPos, endPos)
+    assert(row.isInstanceOf[OmniInternalRow])
+    writer.splitWrite(omniTypes, allOmniTypes, dataColumnsIds,
+      row.asInstanceOf[OmniInternalRow].batch, startPos, endPos)
   }
 
   override def close(): Unit = {
diff --git a/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/OmniParquetFileFormat.scala b/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/OmniParquetFileFormat.scala
index 8add989e7..3bddde7fc 100644
--- a/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/OmniParquetFileFormat.scala
+++ b/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/OmniParquetFileFormat.scala
@@ -36,7 +36,7 @@ import org.apache.spark.util.SerializableConfiguration
 import org.apache.parquet.format.converter.ParquetMetadataConverter.SKIP_ROW_GROUPS
 
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
-import org.apache.parquet.hadoop.{ParquetOutputCommitter,ParquetOutputFormat}
+import org.apache.parquet.hadoop.{ParquetOutputCommitter, ParquetOutputFormat}
 import org.apache.parquet.hadoop.codec.CodecConfig
 import org.apache.parquet.hadoop.ParquetOutputFormat.JobSummaryLevel
 import org.apache.spark.sql.internal.SQLConf
@@ -143,20 +143,20 @@ class OmniParquetFileFormat extends FileFormat with DataSourceRegister with Logg
   }
 
   override def inferSchema(
-      sparkSession: SparkSession,
-      parameters: Map[String, String],
-      files: Seq[FileStatus]): Option[StructType] = {
+                            sparkSession: SparkSession,
+                            parameters: Map[String, String],
+                            files: Seq[FileStatus]): Option[StructType] = {
     ParquetUtils.inferSchema(sparkSession, parameters, files)
   }
 
   override def buildReaderWithPartitionValues(
-      sparkSession: SparkSession,
-      dataSchema: StructType,
-      partitionSchema: StructType,
-      requiredSchema: StructType,
-      filters: Seq[Filter],
-      options: Map[String, String],
-      hadoopConf: Configuration): (PartitionedFile) => Iterator[InternalRow] = {
+                                               sparkSession: SparkSession,
+                                               dataSchema: StructType,
+                                               partitionSchema: StructType,
+                                               requiredSchema: StructType,
+                                               filters: Seq[Filter],
+                                               options: Map[String, String],
+                                               hadoopConf: Configuration): (PartitionedFile) => Iterator[InternalRow] = {
 
     val broadcastedHadoopConf =
       sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf))
-- 
Gitee


From 5081c157d5f4f379800d2476becd3b6d842379ef Mon Sep 17 00:00:00 2001
From: caojiazhi <352139040@qq.com>
Date: Mon, 23 Dec 2024 21:28:30 +0800
Subject: [PATCH 6/6] fix parquet table write reviews

---
 .editorconfig                                 |   0
 .../cpp/src/CMakeLists.txt                    |  19 +-
 .../src/jni/ParquetColumnarBatchJniWriter.cpp | 104 ++--
 .../src/jni/ParquetColumnarBatchJniWriter.h   |  50 +-
 .../cpp/src/parquet/ParquetWriter.cpp         | 515 ++++--------------
 .../cpp/src/parquet/ParquetWriter.h           |   5 +-
 .../jni/ParquetColumnarBatchJniWriter.java    |   6 +-
 .../spark/jni/ParquetColumnarBatchWriter.java |  48 +-
 .../parquet/ OmniParquetOutputWriter.scala    |  34 +-
 .../spark/TableWriteBasicFunctionSuite.scala  | 107 +++-
 10 files changed, 375 insertions(+), 513 deletions(-)
 delete mode 100644 .editorconfig

diff --git a/.editorconfig b/.editorconfig
deleted file mode 100644
index e69de29bb..000000000
diff --git a/omnioperator/omniop-native-reader/cpp/src/CMakeLists.txt b/omnioperator/omniop-native-reader/cpp/src/CMakeLists.txt
index 7061e7da7..00d943777 100644
--- a/omnioperator/omniop-native-reader/cpp/src/CMakeLists.txt
+++ b/omnioperator/omniop-native-reader/cpp/src/CMakeLists.txt
@@ -1,9 +1,9 @@
 include_directories(SYSTEM "/user/local/include")
 
-set (PROJ_TARGET native_reader)
+set(PROJ_TARGET native_reader)
 
 
-set (SOURCE_FILES
+set(SOURCE_FILES
         jni/OrcColumnarBatchJniWriter.cpp
         jni/OrcColumnarBatchJniReader.cpp
         jni/jni_common.cpp
@@ -35,15 +35,15 @@ set (SOURCE_FILES
 
 #Find required protobuf package
 find_package(Protobuf REQUIRED)
-if(PROTOBUF_FOUND)
+if (PROTOBUF_FOUND)
     message(STATUS "protobuf library found")
-else()
+else ()
     message(FATAL_ERROR "protobuf library is needed but cant be found")
-endif()
+endif ()
 
 include_directories(${Protobuf_INCLUDE_DIRS})
 include_directories(${CMAKE_CURRENT_BINARY_DIR})
-add_library (${PROJ_TARGET} SHARED ${SOURCE_FILES} ${PROTO_SRCS} ${PROTO_HDRS} ${PROTO_SRCS_VB} ${PROTO_HDRS_VB})
+add_library(${PROJ_TARGET} SHARED ${SOURCE_FILES} ${PROTO_SRCS} ${PROTO_HDRS} ${PROTO_SRCS_VB} ${PROTO_HDRS_VB})
 
 find_package(Arrow REQUIRED)
 find_package(Parquet REQUIRED)
@@ -53,19 +53,18 @@ find_package(ArrowDataset REQUIRED)
 target_include_directories(${PROJ_TARGET} PUBLIC $ENV{JAVA_HOME}/include)
 target_include_directories(${PROJ_TARGET} PUBLIC $ENV{JAVA_HOME}/include/linux)
 target_include_directories(${PROJ_TARGET} PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
-target_include_directories(${PROJ_TARGET} PUBLIC $ENV{OMNI_HOME}/lib/include)
 
-target_link_libraries (${PROJ_TARGET} PUBLIC
+target_link_libraries(${PROJ_TARGET} PUBLIC
         Arrow::arrow_shared
         ArrowDataset::arrow_dataset_shared
         Parquet::parquet_shared
         orc
         boostkit-omniop-vector-1.7.0-aarch64
         hdfs
-        )
+)
 
 set_target_properties(${PROJ_TARGET} PROPERTIES
-                      LIBRARY_OUTPUT_DIRECTORY ${root_directory}/releases
+        LIBRARY_OUTPUT_DIRECTORY ${root_directory}/releases
 )
 
 install(TARGETS ${PROJ_TARGET} DESTINATION lib)
diff --git a/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.cpp b/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.cpp
index 87f4fd494..4e0ffb6d7 100644
--- a/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.cpp
+++ b/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.cpp
@@ -31,14 +31,10 @@ using namespace arrow;
 static constexpr int32_t DECIMAL_PRECISION_INDEX = 0;
 static constexpr int32_t DECIMAL_SCALE_INDEX = 1;
 
-// 定义全局的ParquetWriter指针
-ParquetWriter *pWriter = nullptr;
-
-JNIEXPORT jlong JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_initializeWriter(
-    JNIEnv *env, jobject jObj, jobject jsonObj)
+JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_initializeWriter(
+    JNIEnv *env, jobject jObj, jobject jsonObj, jlong writer)
 {
     JNI_FUNC_START
-    // Get uriStr
     jstring uri = (jstring)env->CallObjectMethod(jsonObj, jsonMethodString, env->NewStringUTF("uri"));
     const char *uriStr = env->GetStringUTFChars(uri, JNI_FALSE);
     std::string uriString(uriStr);
@@ -49,10 +45,10 @@ JNIEXPORT jlong JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJ
     std::string ugiString(ugi);
     env->ReleaseStringUTFChars(ugiTemp, ugi);
 
-    jstring schemeTemp = (jstring)env->CallObjectMethod(jsonObj, jsonMethodString, env->NewStringUTF("scheme"));
-    const char *scheme = env->GetStringUTFChars(schemeTemp, JNI_FALSE);
-    std::string schemeString(scheme);
-    env->ReleaseStringUTFChars(schemeTemp, scheme);
+    jstring schemaTemp = (jstring)env->CallObjectMethod(jsonObj, jsonMethodString, env->NewStringUTF("scheme"));
+    const char *schema = env->GetStringUTFChars(schemaTemp, JNI_FALSE);
+    std::string schemaString(schema);
+    env->ReleaseStringUTFChars(schemaTemp, schema);
 
     jstring hostTemp = (jstring)env->CallObjectMethod(jsonObj, jsonMethodString, env->NewStringUTF("host"));
     const char *host = env->GetStringUTFChars(hostTemp, JNI_FALSE);
@@ -66,57 +62,45 @@ JNIEXPORT jlong JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJ
 
     jint port = (jint)env->CallIntMethod(jsonObj, jsonMethodInt, env->NewStringUTF("port"));
 
-    UriInfo uriInfo(uriString, schemeString, pathString, hostString, std::to_string(port));
-
-    auto state = pWriter->InitRecordWriter(uriInfo, ugiString);
-    if (state != arrow::Status::OK())
-    {
-        env->ThrowNew(runtimeExceptionClass, state.ToString().c_str());
-        return 0;
+    UriInfo uriInfo(uriString, schemaString, pathString, hostString, std::to_string(port));
+    ParquetWriter *pWriter = (ParquetWriter *)writer;
+    if (pWriter == nullptr) {
+        env->ThrowNew(runtimeExceptionClass, "the pWriter is null");
     }
-    return (jlong)(pWriter);
-    JNI_FUNC_END(runtimeExceptionClass)
+    pWriter->InitRecordWriter(uriInfo, ugiString);
+    JNI_FUNC_END_VOID(runtimeExceptionClass)
 }
 
-JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_initializeSchema(
-    JNIEnv *env, jobject JObj, long writer, jobjectArray fieldNames, jintArray fieldTypes, 
+JNIEXPORT jlong JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_initializeSchema(
+    JNIEnv *env, jobject JObj, jlong writer, jobjectArray fieldNames, jintArray fieldTypes,
     jbooleanArray nullables, jobjectArray decimalParam)
 {
-    if (pWriter == nullptr)
-    {
-        pWriter = new ParquetWriter();
-    }
     JNI_FUNC_START
+    auto pWriter = std::make_unique<ParquetWriter>();
     auto fieldTypesPtr = env->GetIntArrayElements(fieldTypes, JNI_FALSE);
     auto nullablesPtr = env->GetBooleanArrayElements(nullables, JNI_FALSE);
-    if (fieldTypesPtr == NULL)
-    {
+    if (fieldTypesPtr == NULL) {
         throw std::runtime_error("Parquet type ids should not be null");
     }
-    auto schemeLength = (int32_t)env->GetArrayLength(fieldTypes);
-
+    auto schemaLength = (int32_t)env->GetArrayLength(fieldTypes);
     FieldVector fieldVector;
-    for (int i = 0; i < schemeLength; i++)
-    {
+    for (int i = 0; i < schemaLength; i++) {
         jint parquetType = fieldTypesPtr[i];
         jboolean nullable = nullablesPtr[i];
         jstring fieldName = (jstring)env->GetObjectArrayElement(fieldNames, i);
         const char *cFieldName = env->GetStringUTFChars(fieldName, nullptr);
         std::shared_ptr<DataType> writeParquetType;
-        if (static_cast<Type::type>(parquetType) == Type::type::DECIMAL)
-        {
+
             auto decimalParamArray = (jintArray)env->GetObjectArrayElement(decimalParam, i);
             auto decimalParamArrayPtr = env->GetIntArrayElements(decimalParamArray, JNI_FALSE);
             auto precision = decimalParamArrayPtr[DECIMAL_PRECISION_INDEX];
-            pWriter->precisions.push_back(precision);
             auto scale = decimalParamArrayPtr[DECIMAL_SCALE_INDEX];
-            pWriter->scales.push_back(scale);
-            writeParquetType = decimal128(precision, scale);
-        }
-        else
-        {
-            switch (static_cast<Type::type>(parquetType))
-            {
+            switch (static_cast<Type::type>(parquetType)) {
+            case Type::type::DECIMAL:
+                 pWriter->precisions.push_back(precision);
+                 pWriter->scales.push_back(scale);
+                 writeParquetType = decimal128(precision, scale);
+                 break;
             case Type::type::BOOL:
                 writeParquetType = arrow::boolean();
                 break;
@@ -142,17 +126,23 @@ JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJn
                 writeParquetType = arrow::utf8();
                 break;
             default:
-                throw std::invalid_argument("Unsupported parquet type");
+                throw std::invalid_argument("Unsupported parquet type: "+std::to_string(parquetType));
             }
-        }
         auto t = field(cFieldName, writeParquetType, nullable);
         fieldVector.emplace_back(t);
+        env->ReleaseIntArrayElements(decimalParamArray,decimalParamArrayPtr,JNI_ABORT);
         env->ReleaseStringUTFChars(fieldName, cFieldName);
     }
     auto t = std::make_unique<Schema>(fieldVector);
+    if (pWriter == nullptr) {
+            env->ThrowNew(runtimeExceptionClass, "the pWriter is null");
+    }
     pWriter->schema_ = std::make_shared<Schema>(fieldVector);
-
-    JNI_FUNC_END_VOID(runtimeExceptionClass)
+    ParquetWriter *pWriterNew= pWriter.release();
+    env->ReleaseIntArrayElements(fieldTypes,fieldTypesPtr,JNI_ABORT);
+    env->ReleaseBooleanArrayElements(nullables,nullablesPtr,JNI_ABORT);
+    return (jlong)(pWriterNew);
+    JNI_FUNC_END(runtimeExceptionClass)
 }
 
 JNIEXPORT void JNICALL
@@ -166,8 +156,13 @@ Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_write(
     auto colNums = env->GetArrayLength(vecNativeId);
     auto omniTypesPtr = env->GetIntArrayElements(omniTypes, JNI_FALSE);
     auto dataColumnsIdsPtr = env->GetBooleanArrayElements(dataColumnsIds, JNI_FALSE);
+    if (pWriter == nullptr) {
+        env->ThrowNew(runtimeExceptionClass, "the pWriter is null");
+    }
     pWriter->write(vecNativeIdPtr, colNums, omniTypesPtr, dataColumnsIdsPtr);
-
+    env->ReleaseLongArrayElements(vecNativeId, vecNativeIdPtr, 0);
+    env->ReleaseIntArrayElements(omniTypes, omniTypesPtr, 0);
+    env->ReleaseBooleanArrayElements(dataColumnsIds, dataColumnsIdsPtr, 0);
     JNI_FUNC_END_VOID(runtimeExceptionClass)
 }
 
@@ -182,6 +177,9 @@ JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJn
     auto dataColumnsIdsPtr = env->GetBooleanArrayElements(dataColumnsIds, JNI_FALSE);
     auto writeRows = endPos - startPos;
     ParquetWriter *pWriter = (ParquetWriter *)writer;
+    if (pWriter == nullptr) {
+        env->ThrowNew(runtimeExceptionClass, "the pWriter is null");
+    }
     pWriter->write(vecNativeIdPtr, colNums, omniTypesPtr, dataColumnsIdsPtr, true, startPos, endPos);
 
     env->ReleaseLongArrayElements(vecNativeId, vecNativeIdPtr, 0);
@@ -189,3 +187,17 @@ JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJn
     env->ReleaseBooleanArrayElements(dataColumnsIds, dataColumnsIdsPtr, 0);
     JNI_FUNC_END_VOID(runtimeExceptionClass)
 }
+
+JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_close(JNIEnv *env, jobject jObj,
+   jlong writer)
+{
+    JNI_FUNC_START
+
+    ParquetWriter *pWriter = (ParquetWriter *)writer;
+    if (pWriter == nullptr) {
+        env->ThrowNew(runtimeExceptionClass, "delete nullptr error for writer");
+    }
+
+    delete pWriter;
+    JNI_FUNC_END_VOID(runtimeExceptionClass)
+}
diff --git a/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.h b/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.h
index a281f0371..8139d51e8 100644
--- a/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.h
+++ b/omnioperator/omniop-native-reader/cpp/src/jni/ParquetColumnarBatchJniWriter.h
@@ -37,20 +37,48 @@ extern "C"
 {
 #endif
 
-    JNIEXPORT jlong JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_initializeWriter
-        (JNIEnv *env, jobject jObj, jobject job);
+/*
+ * Class:       com_huawei_boostkit_writer_jni_ParquetColumnarBatchJniWriter
+ * Method:      initializeWriter
+ * Signature:
+ */
+JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_initializeWriter
+    (JNIEnv *env, jobject jObj, jobject job, jlong writer);
+
+/*
+ * Class:       com_huawei_boostkit_writer_jni_ParquetColumnarBatchJniWriter
+ * Method:      initializeSchema
+ * Signature:
+ */
+JNIEXPORT jlong JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_initializeSchema
+    (JNIEnv *env, jobject jObj, jlong writer, jobjectArray filedNames, jintArray fieldTypes,
+    jbooleanArray nullables, jobjectArray decimalParam);
 
-    JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_initializeSchema
-        (JNIEnv *env, jobject jObj, long writer, jobjectArray filedNames, jintArray fieldTypes,
-         jbooleanArray nullables, jobjectArray decimalParam);
+/*
+ * Class:       com_huawei_boostkit_writer_jni_ParquetColumnarBatchJniWriter
+ * Method:      write
+ * Signature:
+ */
+JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_write(
+    JNIEnv *env, jobject jObj, jlong writer, jlongArray vecNativeId,
+    jintArray omniTypes, jbooleanArray dataColumnsIds, jint numRows);
 
-    JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_write(
-        JNIEnv *env, jobject jObj, jlong writer, jlongArray vecNativeId,
-        jintArray omniTypes, jbooleanArray dataColumnsIds, jint numRows);
+/*
+ * Class:       com_huawei_boostkit_writer_jni_ParquetColumnarBatchJniWriter
+ * Method:      splitWrite
+ * Signature:
+ */
+JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_splitWrite(
+    JNIEnv *env, jobject jObj, jlong writer, jlongArray vecNativeId, jintArray omniTypes,
+    jbooleanArray dataColumnsIds, jlong startPos, jlong endPos);
 
-    JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_splitWrite(
-        JNIEnv *env, jobject jObj, jlong writer, jlongArray vecNativeId, jintArray omniTypes,
-        jbooleanArray dataColumnsIds, jlong startPos, jlong endPos);
+/*
+ * Class:       com_huawei_boostkit_writer_jni_ParquetColumnarBatchJniWriter
+ * Method:      close
+ * Signature:
+ */
+JNIEXPORT void JNICALL Java_com_huawei_boostkit_write_jni_ParquetColumnarBatchJniWriter_close(JNIEnv *env, jobject jObj,
+     jlong writer);
 
 #ifdef __cplusplus
 }
diff --git a/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWriter.cpp b/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWriter.cpp
index fe559c62b..05db6d24b 100644
--- a/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWriter.cpp
+++ b/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWriter.cpp
@@ -36,11 +36,15 @@
 #include "parquet/arrow/reader.h"
 #include "parquet/exception.h"
 #include "parquet/properties.h"
+#include <filesystem>
 #include <mutex>
 #include <unordered_map>
 #include <iostream>
 #include <codecvt>
 #include <locale>
+#include <exception>
+#include <stdexcept>
+#include <stdio.h>
 
 using namespace arrow;
 using namespace arrow::internal;
@@ -55,79 +59,78 @@ namespace omniruntime::writer
 
     arrow::Status ParquetWriter::InitRecordWriter(UriInfo &uri, std::string &ugi)
     {
-
-        // Configure writer settings
         parquet::WriterProperties::Builder writer_properties;
-
-        // Configure Arrow-specific reader settings
         parquet::ArrowWriterProperties::Builder arrow_writer_properties;
 
-        std::shared_ptr<io::OutputStream> outputStream;
-
-        // Get the file from filesystem
         arrow::Status result;
         mutex_.lock();
         Filesystem *fs = GetFileSystemPtr(uri, ugi, result);
         mutex_.unlock();
-        if (fs == nullptr || fs->filesys_ptr == nullptr)
-        {
+        if (fs == nullptr || fs->filesys_ptr == nullptr) {
             return arrow::Status::IOError(result);
         }
 
-        std::string path = uri.ToString();
+        std::string uriPath = uri.ToString();
+        std::filesystem::path path(std::string(uri.Path()));
+        auto res = std::filesystem::create_directories(path.parent_path());
+        if (!res) {
+          throw std::runtime_error("Create local directories fail");
+        }
+        std::shared_ptr<io::OutputStream> outputStream;
         ARROW_ASSIGN_OR_RAISE(outputStream, fs->filesys_ptr->OpenOutputStream(path));
 
         writer_properties.disable_dictionary();
-
-        // Temporarily use the default value of WriterProperties and
-        auto fileWriter = FileWriter::Open(
+        auto fileWriterResult = FileWriter::Open(
             *schema_, arrow::default_memory_pool(), outputStream,
             writer_properties.build(), parquet::default_arrow_writer_properties());
-
-        ARROW_ASSIGN_OR_RAISE(arrow_writer, fileWriter);
-
-        // ARROW_RETURN_NOT_OK(fileWriter.ValueOrDie()->Close());
+        if (!fileWriterResult.ok()) {
+            std::cerr<<"Error opening file writer: "<<fileWriterResult.status().ToString()<<std::endl;
+            return fileWriterResult.status();
+        }
+        ARROW_ASSIGN_OR_RAISE(arrow_writer, fileWriterResult);
         auto pool = arrow::default_memory_pool();
-
         return arrow::Status::OK();
     }
 
     std::shared_ptr<::arrow::ChunkedArray> buildBooleanChunk(DataTypeId typeId, BaseVector *baseVector,
-                                                             bool isSplitWrite = false, long startPos = 0,
+                                                             bool isSplitWrite = false, long startPos  = 0,
                                                              long endPos = 0)
     {
         using T = typename NativeType<OMNI_BOOLEAN>::type;
         auto vector = (Vector<T> *)baseVector;
 
-        if (!isSplitWrite)
-        {
+        if (!isSplitWrite) {
             startPos = 0;
             endPos = vector->GetSize();
         }
 
-        long vectorSize = endPos - startPos;
+        int64_t vectorSize = endPos - startPos;
         bool values[vectorSize];
-        long index = 0;
+        int64_t index = 0;
         auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
         arrow::internal::Bitmap bitmap(bitmapBuffer, 0, vectorSize);
         bitmap.SetBitsTo(true);
 
-        // todo why use vector-> Hashnull before setbitto will make other colums unnormal
-
-        for (long j = startPos; j < endPos; j++)
-        {
-            values[index] = vector->GetValue(j);
-            if (vector->IsNull(j))
-            {
-                bitmap.SetBitTo(index, false);
+        if (vector->HasNull()) {
+            for (long j = startPos; j < endPos; j++) {
+                if (vector->IsNull(j)) {
+                    bitmap.SetBitTo(index, false);
+                } else if(isSplitWrite) {
+                    values[index] = vector->GetValue(j);
+                }
+                index++;
             }
-            index++;
+        } else if (isSplitWrite) {
+              for (long j = startPos; j < endPos; j++) {
+                   values[index] = vector->GetValue(j);
+                   index++;
+              }
         }
 
         TypedBufferBuilder<bool> builder;
         builder.Resize(vectorSize);
 
-        builder.Append(reinterpret_cast<uint8_t *>(values), vectorSize);
+        builder.Append(reinterpret_cast<uint8_t *>(isSplitWrite?values:vector->GetValues()), vectorSize);
         auto maybe_buffer = builder.Finish();
         std::shared_ptr<arrow::Buffer> databuffer = *maybe_buffer;
 
@@ -145,361 +148,89 @@ namespace omniruntime::writer
         return arrow::ChunkedArray::Make(arrayVector, booleanType).ValueOrDie();
     }
 
-    std::shared_ptr<::arrow::ChunkedArray> buildInt16Chunk(DataTypeId typeId, BaseVector *baseVector,
-                                                           bool isSplitWrite = false, long startPos = 0,
-                                                           long endPos = 0)
-    {
-        using T = typename NativeType<OMNI_SHORT>::type;
-        auto vector = (Vector<T> *)baseVector;
-
-        if (!isSplitWrite)
-        {
-            startPos = 0;
-            endPos = vector->GetSize();
-        }
-
-        long vectorSize = endPos - startPos;
-        short values[vectorSize];
-        long index = 0;
-
-        auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
-        arrow::internal::Bitmap bitmap(bitmapBuffer, 0, vectorSize);
-        bitmap.SetBitsTo(true);
-
-        for (long j = startPos; j < endPos; j++)
-        {
-            values[index] = vector->GetValue(j);
-            if (vector->IsNull(j))
-            {
-                bitmap.SetBitTo(index, false);
-            }
-            index++;
-        }
-
-        TypedBufferBuilder<int16_t> builder;
-        builder.Resize(vectorSize);
-        builder.Append(values, vectorSize);
-        auto maybe_buffer = builder.Finish();
-        std::shared_ptr<Buffer> dataBuffer = *maybe_buffer;
-
-        std::vector<std::shared_ptr<Buffer>> buffers;
-        buffers.emplace_back(bitmapBuffer);
-        buffers.emplace_back(dataBuffer);
-
-        auto int16Type = std::make_shared<Int16Type>();
-        auto arrayData = arrow::ArrayData::Make(int16Type, vectorSize, buffers);
-
-        std::vector<std::shared_ptr<Array>> arrayVector;
-        auto numericArray = std::make_shared<NumericArray<Int16Type>>(arrayData);
-        arrayVector.emplace_back(numericArray);
-
-        return ChunkedArray::Make(arrayVector, int16Type).ValueOrDie();
-    }
-
-    std::shared_ptr<::arrow::ChunkedArray> buildInt32Chunk(DataTypeId typeId, BaseVector *baseVector,
-                                                           bool isSplitWrite = false, long startPos = 0,
+    template<DataTypeId Type_ID, typename ArrowType, typename ChunkType>
+    std::shared_ptr<::arrow::ChunkedArray> buildChunk(BaseVector *baseVector,
+                                                           bool isSplitWrite = false, long startPos  = 0,
                                                            long endPos = 0)
     {
-        using T = typename NativeType<OMNI_INT>::type;
-        auto vector = (Vector<T> *)baseVector;
-
-        if (!isSplitWrite)
-        {
-            startPos = 0;
-            endPos = vector->GetSize();
-        }
-
-        long vectorSize = endPos - startPos;
-        int values[vectorSize];
-        long index = 0;
-
-        auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
-        arrow::internal::Bitmap bitmap(bitmapBuffer, 0, vectorSize);
-        bitmap.SetBitsTo(true);
-
-        for (long j = startPos; j < endPos; j++)
-        {
-            values[index] = vector->GetValue(j);
-            if (vector->IsNull(j))
-            {
-                bitmap.SetBitTo(index, false);
-            }
-            index++;
-        }
-
-        TypedBufferBuilder<int32_t> builder;
-        builder.Resize(vectorSize);
-
-        builder.Append(values, vectorSize);
-        auto maybe_buffer = builder.Finish();
-        std::shared_ptr<Buffer> dataBuffer = *maybe_buffer;
-
-        std::vector<std::shared_ptr<Buffer>> buffers;
-
-        buffers.emplace_back(bitmapBuffer);
-        buffers.emplace_back(dataBuffer);
-
-        auto int32Type = std::make_shared<Int32Type>();
-        auto arrayData = arrow::ArrayData::Make(int32Type, vectorSize, buffers);
-
-        std::vector<std::shared_ptr<Array>> arrayVector;
-        auto numericArray = std::make_shared<NumericArray<Int32Type>>(arrayData);
-        arrayVector.emplace_back(numericArray);
-
-        return ChunkedArray::Make(arrayVector, int32Type).ValueOrDie();
-    }
-
-    std::shared_ptr<::arrow::ChunkedArray> buildInt64Chunk(DataTypeId typeId, BaseVector *baseVector,
-                                                           bool isSplitWrite = false, long startPos = 0,
-                                                           long endPos = 0)
-    {
-        using T = typename NativeType<OMNI_LONG>::type;
-        auto vector = (Vector<T> *)baseVector;
-
-        if (!isSplitWrite)
-        {
-            startPos = 0;
-            endPos = vector->GetSize();
-        }
-
-        long vectorSize = endPos - startPos;
-        int64_t values[vectorSize];
-        long index = 0;
-
-        auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
-        arrow::internal::Bitmap bitmap(bitmapBuffer, 0, vectorSize);
-        bitmap.SetBitsTo(true);
-
-        // todo why use vector-> Hashll before setbitto will make other colums unormal
-
-        for (long j = startPos; j < endPos; j++)
-        {
-            values[index] = vector->GetValue(j);
-            if (vector->IsNull(j))
-            {
-                bitmap.SetBitTo(index, false);
-            }
-            index++;
-        }
-
-        TypedBufferBuilder<int64_t> builder;
-        builder.Resize(vectorSize);
-
-        builder.Append(values, vectorSize);
-        auto maybe_buffer = builder.Finish();
-        std::shared_ptr<Buffer> dataBuffer = *maybe_buffer;
-
-        std::vector<std::shared_ptr<Buffer>> buffers;
-
-        buffers.emplace_back(bitmapBuffer);
-        buffers.emplace_back(dataBuffer);
-
-        auto int64Type = std::make_shared<Int64Type>();
-        auto arrayData = arrow::ArrayData::Make(int64Type, vectorSize, buffers);
-
-        std::vector<std::shared_ptr<Array>> arrayVector;
-        auto numericArray = std::make_shared<NumericArray<Int64Type>>(arrayData);
-        arrayVector.emplace_back(numericArray);
-
-        return ChunkedArray::Make(arrayVector, int64Type).ValueOrDie();
-    }
-
-    std::shared_ptr<ChunkedArray> buildDate32Chunk(DataTypeId typeId, BaseVector *baseVector,
-                                                   bool isSplitWrite = false, long startPos = 0,
-                                                   long endPos = 0)
-    {
-        using T = typename NativeType<OMNI_DATE32>::type;
-        auto vector = (Vector<T> *)baseVector;
-
-        if (!isSplitWrite)
-        {
-            startPos = 0;
-            endPos = vector->GetSize();
-        }
-
-        long vectorSize = endPos - startPos;
-        int values[vectorSize];
-        long index = 0;
-
-        auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
-        arrow::internal::Bitmap bitmap(bitmapBuffer, 0, vectorSize);
-        bitmap.SetBitsTo(true);
-
-        for (long j = startPos; j < endPos; j++)
-        {
-            values[index] = vector->GetValue(j);
-            if (vector->IsNull(j))
-            {
-                bitmap.SetBitTo(index, false);
-            }
-            index++;
-        }
-
-        TypedBufferBuilder<int32_t> builder;
-        builder.Resize(vectorSize);
-
-        builder.Append(values, vectorSize);
-        auto maybe_buffer = builder.Finish();
-        std::shared_ptr<Buffer> dataBuffer = *maybe_buffer;
-
-        std::vector<std::shared_ptr<Buffer>> buffers;
-
-        buffers.emplace_back(bitmapBuffer);
-        buffers.emplace_back(dataBuffer);
-
-        auto date32Type = std::make_shared<arrow::Date32Type>();
-        auto arrayData = arrow::ArrayData::Make(date32Type, vectorSize, buffers);
-
-        std::vector<std::shared_ptr<Array>> arrayVector;
-        auto date32Array = std::make_shared<NumericArray<arrow::Date32Type>>(arrayData);
-        arrayVector.emplace_back(date32Array);
-
-        return ChunkedArray::Make(arrayVector, date32Type).ValueOrDie();
-    }
-
-    std::shared_ptr<ChunkedArray> buildDate64Chunk(DataTypeId typeId, BaseVector *baseVector,
-                                                   bool isSplitWrite = false, long startPos = 0,
-                                                   long endPos = 0)
-    {
-        using T = typename NativeType<OMNI_DATE64>::type;
-        auto vector = (Vector<T> *)baseVector;
-
-        if (!isSplitWrite)
-        {
+        using T=typename NativeType<Type_ID>::type;
+        auto vector =(Vector<T> *)baseVector;
+        if (!isSplitWrite) {
             startPos = 0;
             endPos = vector->GetSize();
         }
-
-        long vectorSize = endPos - startPos;
-        int64_t values[vectorSize];
-        long index = 0;
+        int64_t vectorSize = endPos - startPos;
+        ChunkType values[vectorSize];
+        int64_t index = 0;
 
         auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
         arrow::internal::Bitmap bitmap(bitmapBuffer, 0, vectorSize);
         bitmap.SetBitsTo(true);
-
-        for (long j = startPos; j < endPos; j++)
-        {
-            values[index] = vector->GetValue(j);
-            if (vector->IsNull(j))
-            {
-                bitmap.SetBitTo(index, false);
+        if (vector->HasNull()) {
+            for (long j = startPos; j < endPos; j++) {
+                if (vector->IsNull(j)) {
+                    bitmap.SetBitTo(index, false);
+                } else if (isSplitWrite) {
+                    values[index] = vector->GetValue(j);
+                }
+                index++;
             }
-            index++;
+        } else if (isSplitWrite) {
+               for (long j = startPos; j < endPos; j++) {
+                    values[index] = vector->GetValue(j);
+                    index++;
+               }
         }
 
-        TypedBufferBuilder<int64_t> builder;
+        TypedBufferBuilder<ChunkType> builder;
         builder.Resize(vectorSize);
-        builder.Append(values, vectorSize);
-        auto maybe_buffer = builder.Finish();
-        std::shared_ptr<Buffer> dataBuffer = *maybe_buffer;
-
+        builder.Append(isSplitWrite?values:vector->GetValues(), vectorSize);
+        auto dataBuffer = *builder.Finish();
         std::vector<std::shared_ptr<Buffer>> buffers;
         buffers.emplace_back(bitmapBuffer);
         buffers.emplace_back(dataBuffer);
 
-        auto date64Type = std::make_shared<arrow::Date64Type>();
-        auto arrayData = arrow::ArrayData::Make(date64Type, vectorSize, buffers);
-
-        std::vector<std::shared_ptr<Array>> arrayVector;
-        auto date64Array = std::make_shared<NumericArray<arrow::Date64Type>>(arrayData);
-        arrayVector.emplace_back(date64Array);
-
-        return ChunkedArray::Make(arrayVector, date64Type).ValueOrDie();
-    }
-
-    std::shared_ptr<arrow::ChunkedArray> buildDoubleChunk(DataTypeId typeId, BaseVector *baseVector,
-                                                          bool isSplitWrite = false, long startPos = 0,
-                                                          long endPos = 0)
-    {
-        using T = typename NativeType<OMNI_DOUBLE>::type;
-        auto vector = (Vector<T> *)baseVector;
-
-        if (!isSplitWrite)
-        {
-            startPos = 0;
-            endPos = vector->GetSize();
-        }
-
-        long vectorSize = endPos - startPos;
-        double values[vectorSize];
-        long index = 0;
-
-        auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
-        arrow::internal::Bitmap bitmap(bitmapBuffer, 0, vectorSize);
-        bitmap.SetBitsTo(true);
-
-        for (long j = startPos; j < endPos; j++)
-        {
-            values[index] = vector->GetValue(j);
-            if (vector->IsNull(j))
-            {
-                bitmap.SetBitTo(index, false);
-            }
-            index++;
-        }
-
-        TypedBufferBuilder<double> builder;
-        builder.Resize(vectorSize);
-
-        builder.Append(values, vectorSize);
-        auto maybe_buffer = builder.Finish();
-        std::shared_ptr<Buffer> dataBuffer = *maybe_buffer;
-
-        std::vector<std::shared_ptr<Buffer>> buffers;
-        buffers.emplace_back(bitmapBuffer);
-        buffers.emplace_back(dataBuffer);
-
-        auto doubleType = std::make_shared<arrow::DoubleType>();
-        auto arrayData = arrow::ArrayData::Make(doubleType, vectorSize, buffers);
-
-        std::vector<std::shared_ptr<Array>> arrayVector;
-        auto doubleArray = std::make_shared<NumericArray<arrow::DoubleType>>(arrayData);
-        arrayVector.emplace_back(doubleArray);
-
-        return ChunkedArray::Make(arrayVector, doubleType).ValueOrDie();
+        auto arrowType = std::make_shared<ArrowType>();
+        auto arrayData = arrow::ArrayData::Make(arrowType, vectorSize, buffers);
+        std::vector<std::shared_ptr<arrow::Array>> arrayVector;
+        auto arrowArray = std::make_shared<NumericArray<ArrowType>>(arrayData);
+        arrayVector.emplace_back(arrowArray);
+        return ChunkedArray::Make(arrayVector, arrowType).ValueOrDie();
     }
 
     std::shared_ptr<ChunkedArray> buildVarcharChunk(DataTypeId typeId, BaseVector *baseVector,
-                                                    bool isSplitWrite = false, long startPos = 0,
+                                                    bool isSplitWrite = false, long startPos  = 0,
                                                     long endPos = 0)
     {
-        auto vector = dynamic_cast<Vector<LargeStringContainer<std::string_view>> *>(baseVector);
+        auto vector = static_cast<Vector<LargeStringContainer<std::string_view>> *>(baseVector);
 
-        if (!isSplitWrite)
-        {
+        if (!isSplitWrite) {
             startPos = 0;
             endPos = vector->GetSize();
         }
 
-        long vectorSize = endPos - startPos;
+        int64_t vectorSize = endPos - startPos;
         auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
         arrow::internal::Bitmap bitmap(bitmapBuffer, 0, vectorSize);
         bitmap.SetBitsTo(true);
 
         TypedBufferBuilder<int32_t> offsetsBuilder;
         TypedBufferBuilder<char> valuesBuilder;
-        int32_t current_offset = 0;
+        int32_t currentOffset = 0;
         offsetsBuilder.Append(0);
         valuesBuilder.Resize(vectorSize);
 
-        long index = 0;
-        for (long j = startPos; j < endPos; j++)
-        {
-            if (vector->IsNull(j))
-            {
-                bitmap.SetBitTo(index, false);
+        int64_t index = 0;
+        for (long j = startPos; j < endPos; j++) {
+            if (vector->IsNull(j)) {
+                bitmap.SetBitTo(index++, false);
             }
             index++;
             std::string strValue = std::string(vector->GetValue(j));
-            const char *cStr = strValue.c_str();
             size_t length = strValue.length();
-            char *charArray = new char[length + 1];
-            strcpy(charArray, cStr);
-            current_offset += length;
-            offsetsBuilder.Append(current_offset);
-            valuesBuilder.Append(charArray, length);
-            delete[] charArray;
+            currentOffset += length;
+            offsetsBuilder.Append(currentOffset);
+            valuesBuilder.Append(strValue.data(), length);
         }
 
         auto offsetsBuffer = offsetsBuilder.Finish().ValueOrDie();
@@ -521,155 +252,130 @@ namespace omniruntime::writer
         return ChunkedArray::Make(arrayVector, utf8Type).ValueOrDie();
     }
 
-    // arrow parquet don't have Decimal64 Type ,use Decimal128 instead of it
     std::shared_ptr<arrow::ChunkedArray> buildDecimal64Chunk(DataTypeId typeId, BaseVector *baseVector, 
                                                             int precision, int scale, bool isSplitWrite = false, 
-                                                            long startPos = 0, long endPos = 0)
+                                                            long startPos  = 0, long endPos = 0)
     {
         using T = typename NativeType<OMNI_DECIMAL64>::type;
         auto vector = (Vector<T> *)baseVector;
 
-        if (!isSplitWrite)
-        {
+        if (!isSplitWrite) {
             startPos = 0;
             endPos = vector->GetSize();
         }
-
-        long vectorSize = endPos - startPos;
+        int64_t vectorSize = endPos - startPos;
         auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
         arrow::internal::Bitmap bitmap(bitmapBuffer, 0, vectorSize);
         bitmap.SetBitsTo(true);
-
         BufferBuilder builder;
         builder.Resize(vectorSize);
         std::vector<arrow::Decimal128> decimalArray;
 
-        long index = 0;
-        for (long j = startPos; j < endPos; j++)
-        {
-
-            BasicDecimal128 basicDecimal128(0, vector->GetValue(j));
-            decimalArray.emplace_back(BasicDecimal128(basicDecimal128));
-            if (vector->IsNull(j))
-            {
+        int64_t index = 0;
+        for (long j = startPos; j < endPos; j++) {
+             BasicDecimal128 basicDecimal128(0, vector->GetValue(j));
+             decimalArray.emplace_back(BasicDecimal128(basicDecimal128));
+            if (vector->IsNull(j)) {
                 bitmap.SetBitTo(index, false);
             }
             index++;
         }
 
-        builder.Append(decimalArray.data(), decimalArray.size() * 16);
-        auto maybe_buffer = builder.Finish();
-        std::shared_ptr<Buffer> dataBuffer = *maybe_buffer;
-
+        builder.Append(decimalArray.data(), decimalArray.size() * sizeof(arrow::Decimal128));
+         auto dataBuffer = *builder.Finish();
         std::vector<std::shared_ptr<Buffer>> buffers;
         buffers.emplace_back(bitmapBuffer);
         buffers.emplace_back(dataBuffer);
 
         auto decimal128Type = std::make_shared<arrow::Decimal128Type>(precision, scale);
         auto arrayData = arrow::ArrayData::Make(decimal128Type, vectorSize, buffers);
-
         std::vector<std::shared_ptr<Array>> arrayVector;
         auto decimal128Array = std::make_shared<Decimal128Array>(arrayData);
         arrayVector.emplace_back(decimal128Array);
-
         return ChunkedArray::Make(arrayVector, decimal128Type).ValueOrDie();
     }
 
     std::shared_ptr<ChunkedArray> buildDecimal128Chunk(DataTypeId typeId, BaseVector *baseVector, 
                                                       int precision, int scale, bool isSplitWrite = false, 
-                                                      long startPos = 0, long endPos = 0)
+                                                      long startPos  = 0, long endPos = 0)
     {
         using T = typename NativeType<OMNI_DECIMAL128>::type;
         auto vector = (Vector<T> *)baseVector;
 
-        if (!isSplitWrite)
-        {
+        if (!isSplitWrite) {
             startPos = 0;
             endPos = vector->GetSize();
         }
-
-        long vectorSize = endPos - startPos;
+        int64_t vectorSize = endPos - startPos;
         auto bitmapBuffer = AllocateBitmap(vectorSize).ValueOrDie();
         arrow::internal::Bitmap bitmap(bitmapBuffer, 0, vectorSize);
         bitmap.SetBitsTo(true);
-
         BufferBuilder builder;
         builder.Resize(vectorSize);
         std::vector<arrow::Decimal128> decimalArray;
 
-        long index = 0;
-        for (long j = startPos; j < endPos; j++)
-        {
+        int64_t index = 0;
+        for (long j = startPos; j < endPos; j++) {
             auto decimalValue = vector->GetValue(j);
             BasicDecimal128 basicDecimal128(vector->GetValue(j).HighBits(), vector->GetValue(j).LowBits());
             decimalArray.emplace_back(BasicDecimal128(basicDecimal128));
-
-            decimalArray.back().Rescale(2, 2);
-            if (vector->IsNull(j))
-            {
+            if (vector->IsNull(j)) {
                 bitmap.SetBitTo(index, false);
             }
             index++;
         }
 
-        builder.Append(decimalArray.data(), decimalArray.size() * 16);
-        auto maybe_buffer = builder.Finish();
-        std::shared_ptr<Buffer> dataBuffer = *maybe_buffer;
-
+        builder.Append(decimalArray.data(), decimalArray.size() * sizeof(arrow::Decimal128));
+        auto dataBuffer = *builder.Finish();
         std::vector<std::shared_ptr<Buffer>> buffers;
         buffers.emplace_back(bitmapBuffer);
         buffers.emplace_back(dataBuffer);
 
         auto decimal128Type = std::make_shared<arrow::Decimal128Type>(precision, scale);
         auto arrayData = arrow::ArrayData::Make(decimal128Type, vectorSize, buffers);
-
         std::vector<std::shared_ptr<Array>> arrayVector;
         auto decimal128Array = std::make_shared<Decimal128Array>(arrayData);
         arrayVector.emplace_back(decimal128Array);
-
         return ChunkedArray::Make(arrayVector, decimal128Type).ValueOrDie();
     }
 
     void ParquetWriter::write(long *vecNativeId, int colNums,
                               const int *omniTypes,
                               const unsigned char *dataColumnsIds,
-                              bool isSplitWrite, long startPos, long endPos)
+                              bool isSplitWrite, long startPos , long endPos)
     {
         std::vector<std::shared_ptr<::arrow::ChunkedArray>> chunks;
         int decimalIndex = 0;
         int precision = 0;
         int scale = 0;
-        for (int i = 0; i < colNums; ++i)
-        {
-            if (!dataColumnsIds[i])
-            {
+        for (int i = 0; i < colNums; ++i) {
+            if (!dataColumnsIds[i]) {
                 continue;
             }
 
             auto vec = (BaseVector *)vecNativeId[i];
             auto typeId = static_cast<DataTypeId>(omniTypes[i]);
-            switch (typeId)
-            {
+            switch (typeId) {
             case OMNI_BOOLEAN:
                 chunks.emplace_back(buildBooleanChunk(typeId, vec, isSplitWrite, startPos, endPos));
                 break;
             case OMNI_SHORT:
-                chunks.emplace_back(buildInt16Chunk(typeId, vec, isSplitWrite, startPos, endPos));
+                chunks.emplace_back(buildChunk<OMNI_SHORT, arrow::Int16Type, int16_t>(vec, isSplitWrite, startPos, endPos));
                 break;
             case OMNI_INT:
-                chunks.emplace_back(buildInt32Chunk(typeId, vec, isSplitWrite, startPos, endPos));
+                chunks.emplace_back(buildChunk<OMNI_INT, arrow::Int32Type, int32_t>(vec, isSplitWrite, startPos, endPos));
                 break;
             case OMNI_LONG:
-                chunks.emplace_back(buildInt64Chunk(typeId, vec, isSplitWrite, startPos, endPos));
+                chunks.emplace_back(buildChunk<OMNI_LONG, arrow::Int64Type, int64_t>(vec, isSplitWrite, startPos, endPos));
                 break;
             case OMNI_DATE32:
-                chunks.emplace_back(buildDate32Chunk(typeId, vec, isSplitWrite, startPos, endPos));
+                chunks.emplace_back(buildChunk<OMNI_DATE32, arrow::Date32Type, int32_t>(vec, isSplitWrite, startPos, endPos));
                 break;
             case OMNI_DATE64:
-                chunks.emplace_back(buildDate64Chunk(typeId, vec, isSplitWrite, startPos, endPos));
+                chunks.emplace_back(buildChunk<OMNI_DATE64, arrow::Date64Type, int64_t>(vec, isSplitWrite, startPos, endPos));
                 break;
             case OMNI_DOUBLE:
-                chunks.emplace_back(buildDoubleChunk(typeId, vec, isSplitWrite, startPos, endPos));
+                chunks.emplace_back(buildChunk<OMNI_DOUBLE, arrow::DoubleType, double>(vec, isSplitWrite, startPos, endPos));
                 break;
             case OMNI_VARCHAR:
                 chunks.emplace_back(buildVarcharChunk(typeId, vec, isSplitWrite, startPos, endPos));
@@ -688,12 +394,15 @@ namespace omniruntime::writer
                 break;
             default:
                 throw std::runtime_error(
-                    "Native columnar write not support for this type: " + typeId);
+                    "Native columnar write not support for this type: " + std::to_string(typeId));
             }
         }
-        auto numRows = (chunks.empty() ? 0 : chunks[0]->length());
+        auto numRows = chunks.empty() ? 0 : chunks[0]->length();
 
         auto table = arrow::Table::Make(schema_, std::move(chunks), numRows);
+        if (!arrow_writer) {
+            throw std::runtime_error("Arrow writer is not initialized");
+        }
         PARQUET_THROW_NOT_OK(arrow_writer->WriteTable(*table));
         PARQUET_THROW_NOT_OK(arrow_writer->Close());
     }
diff --git a/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWriter.h b/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWriter.h
index 33631f4a8..59043cfbc 100644
--- a/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWriter.h
+++ b/omnioperator/omniop-native-reader/cpp/src/parquet/ParquetWriter.h
@@ -20,7 +20,7 @@
 #ifndef NATIVE_READER_PARQUETWRITER_H
 #define NATIVE_READER_PARQUETWRITER_H
 
-#endif // NATIVE_READER_PARQUETWRITER_H
+
 
 #include <vector/vector_common.h>
 #include <arrow/filesystem/filesystem.h>
@@ -48,4 +48,5 @@ namespace omniruntime::writer
 		std::vector<int> precisions;
 		std::vector<int> scales;
 	};
-}
\ No newline at end of file
+}
+#endif // NATIVE_READER_PARQUETWRITER_H
\ No newline at end of file
diff --git a/omnioperator/omniop-native-reader/java/src/main/java/com/huawei/boostkit/write/jni/ParquetColumnarBatchJniWriter.java b/omnioperator/omniop-native-reader/java/src/main/java/com/huawei/boostkit/write/jni/ParquetColumnarBatchJniWriter.java
index 704cac09d..aa94fc62a 100644
--- a/omnioperator/omniop-native-reader/java/src/main/java/com/huawei/boostkit/write/jni/ParquetColumnarBatchJniWriter.java
+++ b/omnioperator/omniop-native-reader/java/src/main/java/com/huawei/boostkit/write/jni/ParquetColumnarBatchJniWriter.java
@@ -27,11 +27,13 @@ public class ParquetColumnarBatchJniWriter {
         NativeReaderLoader.getInstance();
     }
 
-    public native long initializeWriter(JSONObject var1);
+    public native void initializeWriter(JSONObject var1, long writer);
 
-    public native void initializeSchema(long writer, String[] fieldNames, int[] fieldTypes, boolean[] nullables, int[][]decimalParam);
+    public native long initializeSchema(long writer, String[] fieldNames, int[] fieldTypes, boolean[] nullables, int[][] decimalParam);
 
     public native void write(long writer, long[] vecNativeId, int[] omniTypes, boolean[] dataColumnsIds, int rowNums);
 
     public native void splitWrite(long writer, long[] vecNativeId, int[] omniTypes, boolean[] dataColumnsIds, long starPos, long endPos);
+
+    public native void close(long writer);
 }
\ No newline at end of file
diff --git a/omnioperator/omniop-spark-extension/java/src/main/java/com/huawei/boostkit/spark/jni/ParquetColumnarBatchWriter.java b/omnioperator/omniop-spark-extension/java/src/main/java/com/huawei/boostkit/spark/jni/ParquetColumnarBatchWriter.java
index 4a299a2b0..5e85c7e70 100644
--- a/omnioperator/omniop-spark-extension/java/src/main/java/com/huawei/boostkit/spark/jni/ParquetColumnarBatchWriter.java
+++ b/omnioperator/omniop-spark-extension/java/src/main/java/com/huawei/boostkit/spark/jni/ParquetColumnarBatchWriter.java
@@ -50,6 +50,12 @@ import java.io.IOException;
 import java.net.URI;
 
 public class ParquetColumnarBatchWriter {
+    public long writer;
+
+    public long schema;
+
+    public ParquetColumnarBatchJniWriter jniWriter;
+
     public ParquetColumnarBatchWriter() {
         jniWriter = new ParquetColumnarBatchJniWriter();
     }
@@ -195,7 +201,8 @@ public class ParquetColumnarBatchWriter {
         writerOptionsJson.put("port", uri.getPort());
         writerOptionsJson.put("path", uri.getPath() == null ? "" : uri.getPath());
 
-        writer = jniWriter.initializeWriter(writerOptionsJson);
+        //  writer = jniWriter.initializeWriter(writerOptionsJson);
+        jniWriter.initializeWriter(writerOptionsJson, writer);
     }
 
     public void convertGreGorianToJulian(IntVec intVec, int startPos, int endPos) {
@@ -211,15 +218,13 @@ public class ParquetColumnarBatchWriter {
         String[] fieldNames = new String[schemaLength];
         int[] fieldTypes = new int[schemaLength];
         boolean[] nullables = new boolean[schemaLength];
-        String[] metaDataKeys = new String[schemaLength];
-        String[] metaDataValues = new String[schemaLength];
         for (int i = 0; i < schemaLength; i++) {
             StructField field = dataSchema.fields()[i];
             fieldNames[i] = field.name();
             fieldTypes[i] = sparkTypeToParquetLibType(field.dataType());
             nullables[i] = field.nullable();
         }
-        jniWriter.initializeSchema(writer, fieldNames, fieldTypes, nullables, extractDecimalParam(dataSchema));
+        writer = jniWriter.initializeSchema(writer, fieldNames, fieldTypes, nullables, extractDecimalParam(dataSchema));
     }
 
     public int sparkTypeToParquetLibType(DataType dataType) {
@@ -228,20 +233,20 @@ public class ParquetColumnarBatchWriter {
         } else if (dataType instanceof ShortType) {
             return ParquetLibTypeKind.INT16.ordinal();
         } else if (dataType instanceof IntegerType) {
-            IntegerType integerType = (IntegerType) dataType;
-            switch (integerType.defaultSize()) {
-                case 1:
-                    return ParquetLibTypeKind.INT8.ordinal();
-                case 2:
-                    return ParquetLibTypeKind.INT16.ordinal();
-                case 4:
-                    return ParquetLibTypeKind.INT32.ordinal();
-                case 8:
-                    return ParquetLibTypeKind.DATE64.ordinal();
-                default:
-                    throw new RuntimeException(
-                            "UnSupport size " + integerType.defaultSize() + " of integer type");
-            }
+//            IntegerType integerType = (IntegerType) dataType;
+//            switch (integerType.defaultSize()) {
+//                case 1:
+//                    return ParquetLibTypeKind.INT8.ordinal();
+//                case 2:
+//                    return ParquetLibTypeKind.INT16.ordinal();
+//                case 4:
+            return ParquetLibTypeKind.INT32.ordinal();
+//                case 8:
+//                    return ParquetLibTypeKind.DATE64.ordinal();
+//                default:
+//                    throw new RuntimeException(
+//                            "UnSupport size " + integerType.defaultSize() + " of integer type");
+//            }
         } else if (dataType instanceof LongType) {
             return ParquetLibTypeKind.INT64.ordinal();
         } else if (dataType instanceof DateType) {
@@ -297,7 +302,6 @@ public class ParquetColumnarBatchWriter {
     }
 
     public void write(int[] omniTypes, boolean[] dataColumnsIds, ColumnarBatch batch) {
-        JSONObject job = new JSONObject();
 
         long[] vecNativeIds = new long[batch.numCols()];
         for (int i = 0; i < batch.numCols(); i++) {
@@ -328,8 +332,8 @@ public class ParquetColumnarBatchWriter {
         jniWriter.splitWrite(writer, vecNativeIds, omniTypes, dataColumnsIds, startPos, endPos);
     }
 
-    public long writer;
+    public void close() {
+        jniWriter.close(writer);
+    }
 
-    public long schema;
-    public ParquetColumnarBatchJniWriter jniWriter;
 }
diff --git a/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ OmniParquetOutputWriter.scala b/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ OmniParquetOutputWriter.scala
index c5d9fa06e..5c58664ea 100644
--- a/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ OmniParquetOutputWriter.scala	
+++ b/omnioperator/omniop-spark-extension/java/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ OmniParquetOutputWriter.scala	
@@ -1,4 +1,5 @@
 /*
+ * Copyright (C) 2024-2024. Huawei Technologies Co., Ltd. All rights reserved.
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -28,7 +29,7 @@ import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.execution.datasources.{OmniInternalRow, OutputWriter}
 import org.apache.spark.sql.types.StructType
 
-import java.net.URI
+import scala.Array.{emptyBooleanArray, emptyIntArray}
 
 // NOTE: This class is instantiated and used on executor side only, no need to be serializable.
 class OmniParquetOutputWriter(path: String, dataSchema: StructType,
@@ -36,23 +37,31 @@ class OmniParquetOutputWriter(path: String, dataSchema: StructType,
   extends OutputWriter {
 
   val writer = new ParquetColumnarBatchWriter()
-  var omniTypes: Array[Int] = new Array[Int](0)
-  var dataColumnsIds: Array[Boolean] = new Array[Boolean](0)
-  var allOmniTypes: Array[Int] = new Array[Int](0)
+  //  var omniTypes: Array[Int] = new Array[Int](0)
+  //  var dataColumnsIds: Array[Boolean] = new Array[Boolean](0)
+  //  var allOmniTypes: Array[Int] = new Array[Int](0)
+  var omniTypes: Array[Int] = emptyIntArray
+  var dataColumnsIds: Array[Boolean] = emptyBooleanArray
+  var allOmniTypes: Array[Int] = emptyIntArray
 
   def initialize(allColumns: Seq[Attribute], dataColumns: Seq[Attribute]): Unit = {
     val filePath = new Path(path)
-    val ugi = UserGroupInformation.getCurrentUser.toString
     writer.initializeSchemaJava(dataSchema)
     writer.initializeWriterJava(filePath)
-    dataSchema.foreach(field => {
-      omniTypes = omniTypes :+ sparkTypeToOmniType(field.dataType, field.metadata).getId.ordinal()
-    })
+    //    dataSchema.foreach(field => {
+    //      omniTypes = omniTypes :+ sparkTypeToOmniType(field.dataType, field.metadata).getId.ordinal()
+    //    })
+    omniTypes = dataSchema.fields
+      .map(field => sparkTypeToOmniType(field.dataType, field.metadata).getId.ordinal())
+      .toArray
 
-    allColumns.toStructType.foreach(field => {
-      allOmniTypes = allOmniTypes :+ sparkTypeToOmniType(field.dataType, field.metadata)
-        .getId.ordinal()
-    })
+    //    allColumns.toStructType.foreach(field => {
+    //      allOmniTypes = allOmniTypes :+ sparkTypeToOmniType(field.dataType, field.metadata)
+    //        .getId.ordinal()
+    //    })
+    allOmniTypes = allColumns.toStructType.fields
+      .map(field => sparkTypeToOmniType(field.dataType, field.metadata).getId.ordinal())
+      .toArray
     dataColumnsIds = allColumns.map(x => dataColumns.contains(x)).toArray
   }
 
@@ -68,6 +77,7 @@ class OmniParquetOutputWriter(path: String, dataSchema: StructType,
   }
 
   override def close(): Unit = {
+    writer.close()
   }
 
   override def path(): String = {
diff --git a/omnioperator/omniop-spark-extension/java/src/test/scala/com/huawei/boostkit/spark/TableWriteBasicFunctionSuite.scala b/omnioperator/omniop-spark-extension/java/src/test/scala/com/huawei/boostkit/spark/TableWriteBasicFunctionSuite.scala
index b32c3983d..c8888db59 100644
--- a/omnioperator/omniop-spark-extension/java/src/test/scala/com/huawei/boostkit/spark/TableWriteBasicFunctionSuite.scala
+++ b/omnioperator/omniop-spark-extension/java/src/test/scala/com/huawei/boostkit/spark/TableWriteBasicFunctionSuite.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, ColumnarBroa
 import org.apache.spark.sql.execution.{ColumnarBroadcastExchangeExec, ColumnarFilterExec, ColumnarProjectExec, ColumnarTakeOrderedAndProjectExec, CommandResultExec, LeafExecNode, OmniColumnarToRowExec, ProjectExec, RowToOmniColumnarExec, SparkPlan, TakeOrderedAndProjectExec, UnaryExecNode}
 import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
 import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.execution.ColumnarDataWritingCommandExec
 
 import scala.concurrent.Future
 
@@ -63,6 +64,24 @@ class TableWriteBasicFunctionSuite extends QueryTest with SharedSparkSession {
     val runRows = select.collect()
     val expectedRows = Seq(Row("Lisa", "Sales", 10000, 35), Row("Maggie", "Sales", 1, 2))
     assert(QueryTest.sameRows(runRows, expectedRows).isEmpty, "the run value is error")
+
+  }
+
+  test("Insert basic data parquet (Non-partitioned table)") {
+    val dropParquet = spark.sql("drop table if exists employees_for_parquet_table_write_ut_test")
+    dropParquet.collect()
+    val employeesParquet = Seq[(String, String, Int, Int)](
+      ("Lisa", "Sales", 10000, 35),
+    ).toDF("name", "dept", "salary", "age")
+    employeesParquet.write.format("parquet").saveAsTable("employees_for_parquet_table_write_ut_test")
+
+    val insertParquet = spark.sql("insert into " +
+      "employees_for_parquet_table_write_ut_test values('Maggie', 'Sales', 1, 2)")
+    insertParquet.collect()
+    val selectParquet = spark.sql("select * from employees_for_parquet_table_write_ut_test")
+    val runRowsParquet = selectParquet.collect()
+    val expectedRowsParquet = Seq(Row("Lisa", "Sales", 10000, 35), Row("Maggie", "Sales", 1, 2))
+    assert(QueryTest.sameRows(runRowsParquet, expectedRowsParquet).isEmpty, "the run value is error")
   }
 
   test("Insert Basic data (Partitioned table)") {
@@ -81,6 +100,22 @@ class TableWriteBasicFunctionSuite extends QueryTest with SharedSparkSession {
     assert(QueryTest.sameRows(runRows, expectedRows).isEmpty, "the run value is error")
   }
 
+  test("Insert Basic data parquet (Partitioned table)") {
+    val drop = spark.sql("drop table if exists employees_for_parquet_table_write_ut_partition_test")
+    drop.collect()
+    val employees = Seq(("Lisa", "Sales", 10000, 35)).toDF("name", "dept", "salary", "age")
+    employees.write.format("parquet").partitionBy("age")
+      .saveAsTable("employees_for_parquet_table_write_ut_partition_test")
+    val insert = spark.sql("insert into employees_for_parquet_table_write_ut_partition_test " +
+      "values('Maggie','Sales',200,30),('Bob','Sales',2000,30),('Tom','Sales',5000,20)")
+    insert.collect()
+    val select = spark.sql("select * from employees_for_parquet_table_write_ut_partition_test")
+    val runRows = select.collect()
+    val expectedRows = Seq(Row("Lisa", "Sales", 10000, 35), Row("Maggie", "Sales", 200, 30),
+      Row("Bob", "Sales", 2000, 30), Row("Tom", "Sales", 5000, 20))
+    assert(QueryTest.sameRows(runRows, expectedRows).isEmpty, "the run value is error")
+  }
+
   test("Unsupported Scenarios") {
     val data = Seq[(Int, Int)](
       (10000, 35),
@@ -91,10 +126,9 @@ class TableWriteBasicFunctionSuite extends QueryTest with SharedSparkSession {
     insert.collect()
     var columnarDataWrite = insert.queryExecution.executedPlan.asInstanceOf[CommandResultExec]
       .commandPhysicalPlan.find({
-        case _: DataWritingCommandExec => true
+        case _: ColumnarDataWritingCommandExec => true
         case _ => false
-      }
-      )
+      })
     assert(columnarDataWrite.isDefined, "use columnar data writing command")
 
     val createTable = spark.sql("create table table_write_ut_map_test" +
@@ -128,6 +162,22 @@ class TableWriteBasicFunctionSuite extends QueryTest with SharedSparkSession {
       "529314109398732268.884038357697864858", "the run value is error")
   }
 
+  test("Insert of parquet decimal 128") {
+    val drop = spark.sql("drop table if exists table_parquet_for_decimal_128")
+    drop.collect()
+    val createTable = spark.sql("create table table_parquet_for_decimal_128 " +
+      "(amount DECIMAL(38,18)) using parquet")
+    createTable.collect()
+
+    val insert = spark.sql("insert into table_parquet_for_decimal_128 " +
+      "values(529314109398732268.884038357697864858)")
+    insert.collect()
+    val select = spark.sql("select * from table_parquet_for_decimal_128")
+    val runRows = select.collect()
+    assert(runRows(0).getDecimal(0).toString ==
+      "529314109398732268.884038357697864858", "the run value is error")
+  }
+
   test("replace child plan to columnar") {
     val drop = spark.sql("drop table if exists test_parquet_int")
     drop.collect()
@@ -148,8 +198,8 @@ class TableWriteBasicFunctionSuite extends QueryTest with SharedSparkSession {
 
     val columnarFilter = insertNew.queryExecution.executedPlan.asInstanceOf[CommandResultExec]
       .commandPhysicalPlan.find({
-        case _: ColumnarFilterExec => true
-        case _ => false
+        case _: ColumnarFilterExec => false
+        case _ => true
       }
       )
     assert(columnarFilter.isDefined, "use columnar data writing command")
@@ -169,6 +219,20 @@ class TableWriteBasicFunctionSuite extends QueryTest with SharedSparkSession {
       "1001-01-04", "the run value is error")
   }
 
+  test("rebase parquet date to julian") {
+    val drop = spark.sql("drop table if exists test_parquet_date")
+    drop.collect()
+    val createTable = spark.sql("create table test_parquet_date(date_col date) using parquet")
+    createTable.collect()
+    val insert = spark.sql("insert into table test_parquet_date values(cast('1001-01-04' as date))")
+    insert.collect()
+
+    val select = spark.sql("select * from test_parquet_date")
+    val runRows = select.collect()
+    assert(runRows(0).getDate(0).toString ==
+      "1001-01-04", "the run value is error")
+  }
+
   test("empty string partition") {
     val drop = spark.sql("drop table if exists table_insert_varchar")
     drop.collect()
@@ -201,4 +265,37 @@ class TableWriteBasicFunctionSuite extends QueryTest with SharedSparkSession {
       Row(13, "6884578", 6, null, null))
     assert(QueryTest.sameRows(runRowsNP, expectedRowsNP).isEmpty, "the run value is error")
   }
+
+  test("empty parquet string partition") {
+    val drop = spark.sql("drop table if exists table_parquet_insert_varchar")
+    drop.collect()
+    val createTable = spark.sql("create table table_parquet_insert_varchar" +
+      "(id int, c_varchar varchar(40)) using parquet partitioned by (p_varchar varchar(40))")
+    createTable.collect()
+    val insert = spark.sql("insert into table table_parquet_insert_varchar values" +
+      "(5,'',''), (13,'6884578', null),  (6,'72135', '666')")
+    insert.collect()
+
+    val select = spark.sql("select * from table_parquet_insert_varchar order by id, c_varchar, p_varchar")
+    val runRows = select.collect()
+    val expectedRows = Seq(Row(5, "", null), Row(6, "72135", "666"), Row(13, "6884578", null))
+    assert(QueryTest.sameRows(runRows, expectedRows).isEmpty, "the run value is error")
+
+    val dropNP = spark.sql("drop table if exists table_parquet_insert_varchar_np")
+    dropNP.collect()
+    val createTableNP = spark.sql("create table table_parquet_insert_varchar_np" +
+      "(id int, c_varchar varchar(40)) using parquet partitioned by " +
+      "(p_varchar1 int, p_varchar2 varchar(40), p_varchar3 varchar(40))")
+    createTableNP.collect()
+    val insertNP = spark.sql("insert into table table_parquet_insert_varchar_np values" +
+      "(5,'',1,'',''), (13,'6884578',6, null, null), (1,'abc',1,'',''), " +
+      "(3,'abcde',6,null,null), (4,'qqqqq', 8, 'a', 'b'), (6,'ooooo', 8, 'a', 'b')")
+    val selectNP = spark.sql("select * from table_parquet_insert_varchar_np " +
+      "order by id, c_varchar, p_varchar1")
+    val runRowsNP = selectNP.collect()
+    val expectedRowsNP = Seq(Row(1, "abc", 1, null, null), Row(3, "abcde", 6, null, null),
+      Row(4, "qqqqq", 8, "a", "b"), Row(5, "", 1, null, null), Row(6, "ooooo", 8, "a", "b"),
+      Row(13, "6884578", 6, null, null))
+    assert(QueryTest.sameRows(runRowsNP, expectedRowsNP).isEmpty, "the run value is error")
+  }
 }
-- 
Gitee