From 24d025b4538f7354d2ced95d711264d90f3375cd Mon Sep 17 00:00:00 2001 From: luo_zihao5524 Date: Fri, 7 Jul 2023 15:52:39 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dcopy=E8=AF=86=E5=88=ABgbk/gb1?= =?UTF-8?q?8030=E4=B8=AD=E6=96=87=E5=AD=97=E7=AC=A6=E9=94=99=E8=AF=AF?= =?UTF-8?q?=E7=9A=84bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/gausskernel/optimizer/commands/copy.cpp | 8 +- .../regress/input/copy_gb18030_test.source | 37 ++++++ src/test/regress/input/copy_gbk_test.source | 36 ++++++ .../regress/output/copy_gb18030_test.source | 112 ++++++++++++++++++ src/test/regress/output/copy_gbk_test.source | 106 +++++++++++++++++ src/test/regress/parallel_schedule0 | 2 +- src/test/regress/parallel_schedule0B | 2 +- 7 files changed, 300 insertions(+), 3 deletions(-) create mode 100644 src/test/regress/input/copy_gb18030_test.source create mode 100644 src/test/regress/input/copy_gbk_test.source create mode 100644 src/test/regress/output/copy_gb18030_test.source create mode 100644 src/test/regress/output/copy_gbk_test.source diff --git a/src/gausskernel/optimizer/commands/copy.cpp b/src/gausskernel/optimizer/commands/copy.cpp index 7f13439da2..74ba6209e4 100644 --- a/src/gausskernel/optimizer/commands/copy.cpp +++ b/src/gausskernel/optimizer/commands/copy.cpp @@ -6937,6 +6937,7 @@ static bool CopyReadLineTextTemplate(CopyState cstate) for (;;) { int prev_raw_ptr; char c; + char sec = '\0'; /* * Load more data if needed. Ideally we would just force four bytes @@ -6974,6 +6975,9 @@ static bool CopyReadLineTextTemplate(CopyState cstate) /* OK to fetch a character */ prev_raw_ptr = raw_buf_ptr; c = copy_raw_buf[raw_buf_ptr++]; + if (raw_buf_ptr < copy_buf_len) { + sec = copy_raw_buf[raw_buf_ptr]; + } if (csv_mode) { /* @@ -7265,10 +7269,12 @@ static bool CopyReadLineTextTemplate(CopyState cstate) * high-bit set, so as an optimization we can avoid this block * entirely if it is not set. */ - if (cstate->encoding_embeds_ascii && IS_HIGHBIT_SET(c)) { + if ((cstate->encoding_embeds_ascii || cstate->file_encoding == PG_GBK || cstate->file_encoding == PG_GB18030) + && IS_HIGHBIT_SET(c)) { int mblen; mblen_str[0] = c; + mblen_str[1] = sec; /* All our encodings only read the first byte to get the length */ mblen = pg_encoding_mblen(cstate->file_encoding, mblen_str); IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(mblen - 1); diff --git a/src/test/regress/input/copy_gb18030_test.source b/src/test/regress/input/copy_gb18030_test.source new file mode 100644 index 0000000000..43763e2c3b --- /dev/null +++ b/src/test/regress/input/copy_gb18030_test.source @@ -0,0 +1,37 @@ +CREATE DATABASE db_gb18030 TEMPLATE template0 encoding 'GB18030' lc_ctype 'zh_CN.GB18030' lc_collate 'zh_CN.GB18030'; +\c db_gb18030 +create table t (id int,c varchar); +insert into t values (1,'测试'); +insert into t values (2,'测试玕'); +insert into t values (3,'测试3玕测试4'); +insert into t values (4,'玕测试4'); +insert into t values (5,'乗俓僜刓匼哱圽塡奬媆峔嶾廫慭怽揬昞朶梊榎橽歕沑漒瀄焅燶猏玕琝甛璡痋盶癨瞈砛碶穃竆筡篭糪絓綷縗繺羂耚肻腬臶臷芢蒤薥蚛蝄蟎衆蟎裓覾譢豛赲踈躙輁郳醆鈂鉢鎈鏫閈闬隲頫颸餦馶骪鯸鮘鳿鵟鸤黒齖'); +insert into t values (6,'㐁㐂㐃㐄㐅㐆㐆㐇㐈㐉㐋㐌㐍㐎㐏㐐㐑㐒㐓'); + +create table t_stdin (id int,c varchar); +create table t_csv (id int,c varchar); +create table t_text (id int,c varchar); +create table t_fixed (id int,c varchar); +create table t_binary (id int,c varchar); + +COPY t TO '@abs_srcdir@/data/datanode1/t_csv.data' WITH(FORMAT 'csv'); +COPY t TO '@abs_srcdir@/data/datanode1/t_text.data' WITH(FORMAT 'text'); +COPY t TO '@abs_srcdir@/data/datanode1/t_fixed.data' fixed formatter(id(0,2), c(2,300)); +COPY t TO '@abs_srcdir@/data/datanode1/t_binary.data' WITH(FORMAT 'binary'); + +COPY t_csv FROM '@abs_srcdir@/data/datanode1/t_csv.data' WITH(FORMAT 'csv'); +COPY t_text FROM '@abs_srcdir@/data/datanode1/t_text.data' WITH(FORMAT 'text'); +COPY t_fixed FROM '@abs_srcdir@/data/datanode1/t_fixed.data' fixed formatter(id(0,2), c(2,300)); +COPY t_binary FROM '@abs_srcdir@/data/datanode1/t_binary.data' WITH(FORMAT 'binary'); + +SELECT * FROM t; +SELECT * FROM t_csv; +SELECT * FROM t_text; +SELECT * FROM t_fixed; +SELECT * FROM t_binary; + +\d t; +\d t_csv; +\d t_text; +\d t_fixed; +\d t_binary; diff --git a/src/test/regress/input/copy_gbk_test.source b/src/test/regress/input/copy_gbk_test.source new file mode 100644 index 0000000000..c1c314d7b7 --- /dev/null +++ b/src/test/regress/input/copy_gbk_test.source @@ -0,0 +1,36 @@ +CREATE DATABASE db_gbk_test TEMPLATE template0 encoding 'GBK' lc_ctype 'zh_CN.GBK' lc_collate 'zh_CN.GBK'; +\c db_gbk_test +create table t (id int,c varchar); +insert into t values (1,'测试'); +insert into t values (2,'测试玕'); +insert into t values (3,'测试3玕测试4'); +insert into t values (4,'玕测试4'); +insert into t values (5,'乗俓僜刓匼哱圽塡奬媆峔嶾廫慭怽揬昞朶梊榎橽歕沑漒瀄焅燶猏玕琝甛璡痋盶癨瞈砛碶穃竆筡篭糪絓綷縗繺羂耚肻腬臶臷芢蒤薥蚛蝄蟎衆蟎裓覾譢豛赲踈躙輁郳醆鈂鉢鎈鏫閈闬隲頫颸餦馶骪鯸鮘鳿鵟鸤黒齖'); + +create table t_stdin (id int,c varchar); +create table t_csv (id int,c varchar); +create table t_text (id int,c varchar); +create table t_fixed (id int,c varchar); +create table t_binary (id int,c varchar); + +COPY t TO '@abs_srcdir@/data/datanode1/t_csv.data' WITH(FORMAT 'csv'); +COPY t TO '@abs_srcdir@/data/datanode1/t_text.data' WITH(FORMAT 'text'); +COPY t TO '@abs_srcdir@/data/datanode1/t_fixed.data' fixed formatter(id(0,2), c(2,300)); +COPY t TO '@abs_srcdir@/data/datanode1/t_binary.data' WITH(FORMAT 'binary'); + +COPY t_csv FROM '@abs_srcdir@/data/datanode1/t_csv.data' WITH(FORMAT 'csv'); +COPY t_text FROM '@abs_srcdir@/data/datanode1/t_text.data' WITH(FORMAT 'text'); +COPY t_fixed FROM '@abs_srcdir@/data/datanode1/t_fixed.data' fixed formatter(id(0,2), c(2,300)); +COPY t_binary FROM '@abs_srcdir@/data/datanode1/t_binary.data' WITH(FORMAT 'binary'); + +SELECT * FROM t; +SELECT * FROM t_csv; +SELECT * FROM t_text; +SELECT * FROM t_fixed; +SELECT * FROM t_binary; + +\d t; +\d t_csv; +\d t_text; +\d t_fixed; +\d t_binary; \ No newline at end of file diff --git a/src/test/regress/output/copy_gb18030_test.source b/src/test/regress/output/copy_gb18030_test.source new file mode 100644 index 0000000000..74c3214409 --- /dev/null +++ b/src/test/regress/output/copy_gb18030_test.source @@ -0,0 +1,112 @@ +CREATE DATABASE db_gb18030 TEMPLATE template0 encoding 'GB18030' lc_ctype 'zh_CN.GB18030' lc_collate 'zh_CN.GB18030'; +\c db_gb18030 +create table t (id int,c varchar); +insert into t values (1,'测试'); +insert into t values (2,'测试玕'); +insert into t values (3,'测试3玕测试4'); +insert into t values (4,'玕测试4'); +insert into t values (5,'乗俓僜刓匼哱圽塡奬媆峔嶾廫慭怽揬昞朶梊榎橽歕沑漒瀄焅燶猏玕琝甛璡痋盶癨瞈砛碶穃竆筡篭糪絓綷縗繺羂耚肻腬臶臷芢蒤薥蚛蝄蟎衆蟎裓覾譢豛赲踈躙輁郳醆鈂鉢鎈鏫閈闬隲頫颸餦馶骪鯸鮘鳿鵟鸤黒齖'); +insert into t values (6,'㐁㐂㐃㐄㐅㐆㐆㐇㐈㐉㐋㐌㐍㐎㐏㐐㐑㐒㐓'); +create table t_stdin (id int,c varchar); +create table t_csv (id int,c varchar); +create table t_text (id int,c varchar); +create table t_fixed (id int,c varchar); +create table t_binary (id int,c varchar); +COPY t TO '@abs_srcdir@/data/datanode1/t_csv.data' WITH(FORMAT 'csv'); +COPY t TO '@abs_srcdir@/data/datanode1/t_text.data' WITH(FORMAT 'text'); +COPY t TO '@abs_srcdir@/data/datanode1/t_fixed.data' fixed formatter(id(0,2), c(2,300)); +COPY t TO '@abs_srcdir@/data/datanode1/t_binary.data' WITH(FORMAT 'binary'); +COPY t_csv FROM '@abs_srcdir@/data/datanode1/t_csv.data' WITH(FORMAT 'csv'); +COPY t_text FROM '@abs_srcdir@/data/datanode1/t_text.data' WITH(FORMAT 'text'); +COPY t_fixed FROM '@abs_srcdir@/data/datanode1/t_fixed.data' fixed formatter(id(0,2), c(2,300)); +COPY t_binary FROM '@abs_srcdir@/data/datanode1/t_binary.data' WITH(FORMAT 'binary'); +SELECT * FROM t; + id | c +----+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + 1 | 测试 + 2 | 测试玕 + 3 | 测试3玕测试4 + 4 | 玕测试4 + 5 | 乗俓僜刓匼哱圽塡奬媆峔嶾廫慭怽揬昞朶梊榎橽歕沑漒瀄焅燶猏玕琝甛璡痋盶癨瞈砛碶穃竆筡篭糪絓綷縗繺羂耚肻腬臶臷芢蒤薥蚛蝄蟎衆蟎裓覾譢豛赲踈躙輁郳醆鈂鉢鎈鏫閈闬隲頫颸餦馶骪鯸鮘鳿鵟鸤黒齖 + 6 | 㐁㐂㐃㐄㐅㐆㐆㐇㐈㐉㐋㐌㐍㐎㐏㐐㐑㐒㐓 +(6 rows) + +SELECT * FROM t_csv; + id | c +----+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + 1 | 测试 + 2 | 测试玕 + 3 | 测试3玕测试4 + 4 | 玕测试4 + 5 | 乗俓僜刓匼哱圽塡奬媆峔嶾廫慭怽揬昞朶梊榎橽歕沑漒瀄焅燶猏玕琝甛璡痋盶癨瞈砛碶穃竆筡篭糪絓綷縗繺羂耚肻腬臶臷芢蒤薥蚛蝄蟎衆蟎裓覾譢豛赲踈躙輁郳醆鈂鉢鎈鏫閈闬隲頫颸餦馶骪鯸鮘鳿鵟鸤黒齖 + 6 | 㐁㐂㐃㐄㐅㐆㐆㐇㐈㐉㐋㐌㐍㐎㐏㐐㐑㐒㐓 +(6 rows) + +SELECT * FROM t_text; + id | c +----+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + 1 | 测试 + 2 | 测试玕 + 3 | 测试3玕测试4 + 4 | 玕测试4 + 5 | 乗俓僜刓匼哱圽塡奬媆峔嶾廫慭怽揬昞朶梊榎橽歕沑漒瀄焅燶猏玕琝甛璡痋盶癨瞈砛碶穃竆筡篭糪絓綷縗繺羂耚肻腬臶臷芢蒤薥蚛蝄蟎衆蟎裓覾譢豛赲踈躙輁郳醆鈂鉢鎈鏫閈闬隲頫颸餦馶骪鯸鮘鳿鵟鸤黒齖 + 6 | 㐁㐂㐃㐄㐅㐆㐆㐇㐈㐉㐋㐌㐍㐎㐏㐐㐑㐒㐓 +(6 rows) + +SELECT * FROM t_fixed; + id | c +----+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + 1 | 测试 + 2 | 测试玕 + 3 | 测试3玕测试4 + 4 | 玕测试4 + 5 | 乗俓僜刓匼哱圽塡奬媆峔嶾廫慭怽揬昞朶梊榎橽歕沑漒瀄焅燶猏玕琝甛璡痋盶癨瞈砛碶穃竆筡篭糪絓綷縗繺羂耚肻腬臶臷芢蒤薥蚛蝄蟎衆蟎裓覾譢豛赲踈躙輁郳醆鈂鉢鎈鏫閈闬隲頫颸餦馶骪鯸鮘鳿鵟鸤黒齖 + 6 | 㐁㐂㐃㐄㐅㐆㐆㐇㐈㐉㐋㐌㐍㐎㐏㐐㐑㐒㐓 +(6 rows) + +SELECT * FROM t_binary; + id | c +----+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + 1 | 测试 + 2 | 测试玕 + 3 | 测试3玕测试4 + 4 | 玕测试4 + 5 | 乗俓僜刓匼哱圽塡奬媆峔嶾廫慭怽揬昞朶梊榎橽歕沑漒瀄焅燶猏玕琝甛璡痋盶癨瞈砛碶穃竆筡篭糪絓綷縗繺羂耚肻腬臶臷芢蒤薥蚛蝄蟎衆蟎裓覾譢豛赲踈躙輁郳醆鈂鉢鎈鏫閈闬隲頫颸餦馶骪鯸鮘鳿鵟鸤黒齖 + 6 | 㐁㐂㐃㐄㐅㐆㐆㐇㐈㐉㐋㐌㐍㐎㐏㐐㐑㐒㐓 +(6 rows) + +\d t; + Table "public.t" + Column | Type | Modifiers +--------+-------------------+----------- + id | integer | + c | character varying | + +\d t_csv; + Table "public.t_csv" + Column | Type | Modifiers +--------+-------------------+----------- + id | integer | + c | character varying | + +\d t_text; + Table "public.t_text" + Column | Type | Modifiers +--------+-------------------+----------- + id | integer | + c | character varying | + +\d t_fixed; + Table "public.t_fixed" + Column | Type | Modifiers +--------+-------------------+----------- + id | integer | + c | character varying | + +\d t_binary; + Table "public.t_binary" + Column | Type | Modifiers +--------+-------------------+----------- + id | integer | + c | character varying | + diff --git a/src/test/regress/output/copy_gbk_test.source b/src/test/regress/output/copy_gbk_test.source new file mode 100644 index 0000000000..ad6b470ede --- /dev/null +++ b/src/test/regress/output/copy_gbk_test.source @@ -0,0 +1,106 @@ +CREATE DATABASE db_gbk_test TEMPLATE template0 encoding 'GBK' lc_ctype 'zh_CN.GBK' lc_collate 'zh_CN.GBK'; +\c db_gbk_test +create table t (id int,c varchar); +insert into t values (1,'测试'); +insert into t values (2,'测试玕'); +insert into t values (3,'测试3玕测试4'); +insert into t values (4,'玕测试4'); +insert into t values (5,'乗俓僜刓匼哱圽塡奬媆峔嶾廫慭怽揬昞朶梊榎橽歕沑漒瀄焅燶猏玕琝甛璡痋盶癨瞈砛碶穃竆筡篭糪絓綷縗繺羂耚肻腬臶臷芢蒤薥蚛蝄蟎衆蟎裓覾譢豛赲踈躙輁郳醆鈂鉢鎈鏫閈闬隲頫颸餦馶骪鯸鮘鳿鵟鸤黒齖'); +create table t_stdin (id int,c varchar); +create table t_csv (id int,c varchar); +create table t_text (id int,c varchar); +create table t_fixed (id int,c varchar); +create table t_binary (id int,c varchar); +COPY t TO '@abs_srcdir@/data/datanode1/t_csv.data' WITH(FORMAT 'csv'); +COPY t TO '@abs_srcdir@/data/datanode1/t_text.data' WITH(FORMAT 'text'); +COPY t TO '@abs_srcdir@/data/datanode1/t_fixed.data' fixed formatter(id(0,2), c(2,300)); +COPY t TO '@abs_srcdir@/data/datanode1/t_binary.data' WITH(FORMAT 'binary'); +COPY t_csv FROM '@abs_srcdir@/data/datanode1/t_csv.data' WITH(FORMAT 'csv'); +COPY t_text FROM '@abs_srcdir@/data/datanode1/t_text.data' WITH(FORMAT 'text'); +COPY t_fixed FROM '@abs_srcdir@/data/datanode1/t_fixed.data' fixed formatter(id(0,2), c(2,300)); +COPY t_binary FROM '@abs_srcdir@/data/datanode1/t_binary.data' WITH(FORMAT 'binary'); +SELECT * FROM t; + id | c +----+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + 1 | 测试 + 2 | 测试玕 + 3 | 测试3玕测试4 + 4 | 玕测试4 + 5 | 乗俓僜刓匼哱圽塡奬媆峔嶾廫慭怽揬昞朶梊榎橽歕沑漒瀄焅燶猏玕琝甛璡痋盶癨瞈砛碶穃竆筡篭糪絓綷縗繺羂耚肻腬臶臷芢蒤薥蚛蝄蟎衆蟎裓覾譢豛赲踈躙輁郳醆鈂鉢鎈鏫閈闬隲頫颸餦馶骪鯸鮘鳿鵟鸤黒齖 +(5 rows) + +SELECT * FROM t_csv; + id | c +----+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + 1 | 测试 + 2 | 测试玕 + 3 | 测试3玕测试4 + 4 | 玕测试4 + 5 | 乗俓僜刓匼哱圽塡奬媆峔嶾廫慭怽揬昞朶梊榎橽歕沑漒瀄焅燶猏玕琝甛璡痋盶癨瞈砛碶穃竆筡篭糪絓綷縗繺羂耚肻腬臶臷芢蒤薥蚛蝄蟎衆蟎裓覾譢豛赲踈躙輁郳醆鈂鉢鎈鏫閈闬隲頫颸餦馶骪鯸鮘鳿鵟鸤黒齖 +(5 rows) + +SELECT * FROM t_text; + id | c +----+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + 1 | 测试 + 2 | 测试玕 + 3 | 测试3玕测试4 + 4 | 玕测试4 + 5 | 乗俓僜刓匼哱圽塡奬媆峔嶾廫慭怽揬昞朶梊榎橽歕沑漒瀄焅燶猏玕琝甛璡痋盶癨瞈砛碶穃竆筡篭糪絓綷縗繺羂耚肻腬臶臷芢蒤薥蚛蝄蟎衆蟎裓覾譢豛赲踈躙輁郳醆鈂鉢鎈鏫閈闬隲頫颸餦馶骪鯸鮘鳿鵟鸤黒齖 +(5 rows) + +SELECT * FROM t_fixed; + id | c +----+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + 1 | 测试 + 2 | 测试玕 + 3 | 测试3玕测试4 + 4 | 玕测试4 + 5 | 乗俓僜刓匼哱圽塡奬媆峔嶾廫慭怽揬昞朶梊榎橽歕沑漒瀄焅燶猏玕琝甛璡痋盶癨瞈砛碶穃竆筡篭糪絓綷縗繺羂耚肻腬臶臷芢蒤薥蚛蝄蟎衆蟎裓覾譢豛赲踈躙輁郳醆鈂鉢鎈鏫閈闬隲頫颸餦馶骪鯸鮘鳿鵟鸤黒齖 +(5 rows) + +SELECT * FROM t_binary; + id | c +----+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + 1 | 测试 + 2 | 测试玕 + 3 | 测试3玕测试4 + 4 | 玕测试4 + 5 | 乗俓僜刓匼哱圽塡奬媆峔嶾廫慭怽揬昞朶梊榎橽歕沑漒瀄焅燶猏玕琝甛璡痋盶癨瞈砛碶穃竆筡篭糪絓綷縗繺羂耚肻腬臶臷芢蒤薥蚛蝄蟎衆蟎裓覾譢豛赲踈躙輁郳醆鈂鉢鎈鏫閈闬隲頫颸餦馶骪鯸鮘鳿鵟鸤黒齖 +(5 rows) + +\d t; + Table "public.t" + Column | Type | Modifiers +--------+-------------------+----------- + id | integer | + c | character varying | + +\d t_csv; + Table "public.t_csv" + Column | Type | Modifiers +--------+-------------------+----------- + id | integer | + c | character varying | + +\d t_text; + Table "public.t_text" + Column | Type | Modifiers +--------+-------------------+----------- + id | integer | + c | character varying | + +\d t_fixed; + Table "public.t_fixed" + Column | Type | Modifiers +--------+-------------------+----------- + id | integer | + c | character varying | + +\d t_binary; + Table "public.t_binary" + Column | Type | Modifiers +--------+-------------------+----------- + id | integer | + c | character varying | + diff --git a/src/test/regress/parallel_schedule0 b/src/test/regress/parallel_schedule0 index b50b8e4590..348f58d823 100644 --- a/src/test/regress/parallel_schedule0 +++ b/src/test/regress/parallel_schedule0 @@ -624,7 +624,7 @@ test: interval tinterval macaddr tstypes comments # is concurrent safe.(duplicate) # ---------- test: copyselect copy_error_log copy_support_transform copy_from_support_parallel -test: copy_new_gram +test: copy_new_gram copy_gbk_test copy_gb18030_test #test: copy_eol # ---------- diff --git a/src/test/regress/parallel_schedule0B b/src/test/regress/parallel_schedule0B index bb036921b1..6925c8d4b8 100644 --- a/src/test/regress/parallel_schedule0B +++ b/src/test/regress/parallel_schedule0B @@ -186,7 +186,7 @@ test: interval tinterval macaddr tstypes comments # is concurrent safe.(duplicate) # ---------- test: copyselect copy_error_log copy_support_transform copy_from_support_parallel -test: copy_new_gram +test: copy_new_gram copy_gbk_test copy_gb18030_test #test: copy_eol # ---------- -- Gitee