From e60db0f0ce7f7cf39e0152e5103938d4519a4220 Mon Sep 17 00:00:00 2001 From: laishenghao Date: Fri, 20 Dec 2024 16:20:00 +0800 Subject: [PATCH] =?UTF-8?q?=E8=A7=A3=E5=86=B3=E5=89=8D=E7=BC=80=E7=B4=A2?= =?UTF-8?q?=E5=BC=95+in=E6=88=96any=E6=9D=A1=E4=BB=B6=E5=8C=B9=E9=85=8D?= =?UTF-8?q?=E7=BB=93=E6=9E=9C=E4=B8=8D=E6=AD=A3=E7=A1=AE=E7=9A=84=E9=97=AE?= =?UTF-8?q?=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/common/backend/utils/cache/lsyscache.cpp | 14 ++ src/gausskernel/optimizer/path/indxpath.cpp | 170 +++++++++++++++++- src/include/utils/lsyscache.h | 1 + src/test/regress/expected/prefixkey_index.out | 159 ++++++++++++++++ src/test/regress/sql/prefixkey_index.sql | 46 +++++ 5 files changed, 386 insertions(+), 4 deletions(-) diff --git a/src/common/backend/utils/cache/lsyscache.cpp b/src/common/backend/utils/cache/lsyscache.cpp index 4204712076..5cc94ab439 100644 --- a/src/common/backend/utils/cache/lsyscache.cpp +++ b/src/common/backend/utils/cache/lsyscache.cpp @@ -1336,6 +1336,20 @@ RegProcedure get_oprrest(Oid opno) } } +void get_operator_types(Oid opno, Oid* leftType, Oid* rightType) +{ + HeapTuple tuple = SearchSysCache1(OPEROID, ObjectIdGetDatum(opno)); + if (HeapTupleIsValid(tuple)) { + Form_pg_operator opForm = (Form_pg_operator)GETSTRUCT(tuple); + *leftType = opForm->oprleft; + *rightType = opForm->oprright; + ReleaseSysCache(tuple); + } else { + *leftType = InvalidOid; + *rightType = InvalidOid; + } +} + /* * get_oprjoin * diff --git a/src/gausskernel/optimizer/path/indxpath.cpp b/src/gausskernel/optimizer/path/indxpath.cpp index 389f8a17ad..979117846a 100755 --- a/src/gausskernel/optimizer/path/indxpath.cpp +++ b/src/gausskernel/optimizer/path/indxpath.cpp @@ -173,6 +173,9 @@ static RestrictInfo* rewrite_opclause_for_prefixkey( static Const *pad_string_in_like(PadContent content, const Const *strConst, int length, bool isPadMax); static int get_pad_length(Node *leftop, int prefixLen); static PadContent get_pad_content(Oid collation); +static bool scalar_array_can_match_prefixkey(Node *saop_rexpr); +RestrictInfo *expand_indexqual_scalar_array_op_expr(IndexOptInfo *index, RestrictInfo *rinfo, + Oid opfamily, int indexcol); void check_report_cause_type(FuncExpr *funcExpr, int indkey); Node* match_first_var_to_indkey(Node* node, int indkey); @@ -2377,6 +2380,7 @@ static bool match_clause_to_indexcol(IndexOptInfo* index, int indexcol, Restrict Oid expr_op; Oid expr_coll; bool plain_op = false; + bool isMatchPrefixKey = true; Assert(indexcol < index->nkeycolumns); opfamily = index->opfamily[indexcol]; @@ -2425,6 +2429,7 @@ static bool match_clause_to_indexcol(IndexOptInfo* index, int indexcol, Restrict expr_op = saop->opno; expr_coll = saop->inputcollid; plain_op = false; + isMatchPrefixKey = scalar_array_can_match_prefixkey(rightop); } else if (clause && IsA(clause, RowCompareExpr)) { return match_rowcompare_to_indexcol(index, indexcol, opfamily, idxcollation, (RowCompareExpr*)clause); } else if (index->amsearchnulls && IsA(clause, NullTest)) { @@ -2440,8 +2445,8 @@ static bool match_clause_to_indexcol(IndexOptInfo* index, int indexcol, Restrict * Check for clauses of the form: (indexkey operator constant) or * (constant operator indexkey). See above notes about const-ness. */ - if (match_index_to_operand(leftop, indexcol, index, true) && !bms_is_member(index_relid, right_relids) && - !contain_volatile_functions(rightop)) { + if (match_index_to_operand(leftop, indexcol, index, isMatchPrefixKey) && + !bms_is_member(index_relid, right_relids) && !contain_volatile_functions(rightop)) { if (IndexCollMatchesExprColl(idxcollation, expr_coll) && is_indexable_operator(expr_op, opfamily, true)) return true; @@ -2457,7 +2462,7 @@ static bool match_clause_to_indexcol(IndexOptInfo* index, int indexcol, Restrict return false; } - if (plain_op && match_index_to_operand(rightop, indexcol, index, true) && + if (plain_op && match_index_to_operand(rightop, indexcol, index, isMatchPrefixKey) && !bms_is_member(index_relid, left_relids) && !contain_volatile_functions(leftop)) { if (IndexCollMatchesExprColl(idxcollation, expr_coll) && is_indexable_operator(expr_op, opfamily, false)) return true; @@ -3514,7 +3519,7 @@ void expand_indexqual_conditions( indexqualcols = lappend_int(indexqualcols, indexcol); } else if (IsA(clause, ScalarArrayOpExpr)) { /* no extra work at this time */ - indexquals = lappend(indexquals, rinfo); + indexquals = lappend(indexquals, expand_indexqual_scalar_array_op_expr(index, rinfo, curFamily, indexcol)); indexqualcols = lappend_int(indexqualcols, indexcol); } else if (IsA(clause, RowCompareExpr)) { indexquals = lappend(indexquals, expand_indexqual_rowcompare(rinfo, index, indexcol)); @@ -4700,3 +4705,160 @@ Node* match_first_var_to_indkey(Node* node, int indkey) } return lastNode; } + +static Node *strip_array_coercion(Node *node) +{ + while (node) { + if (IsA(node, ArrayCoerceExpr) && ((ArrayCoerceExpr*)node)->elemfuncid == InvalidOid) { + node = (Node*)((ArrayCoerceExpr*)node)->arg; + } else if (IsA(node, RelabelType)) { + node = (Node*)((RelabelType*)node)->arg; + } else { + break; + } + } + + return node; +} + +/* To avoid error in expand_indexqual_scalar_array_op_expr, we cannot match prefix key with this condition. */ +static bool scalar_array_can_match_prefixkey(Node *saop_rexpr) +{ + Node* rexpr = strip_array_coercion(saop_rexpr); + if (!rexpr) { + return false; + } + if (IsA(rexpr, Const) || + (IsA(rexpr, ArrayExpr) && !((ArrayExpr*)rexpr)->multidims)) { + return true; + } + + /* We cannot rewrite ArrayCoerceExpr and other type node now. */ + return false; +} + +Datum get_prefix_datum(Datum src, int prefix_len, Oid datatype) +{ + int datum_len; + if (datatype == BYTEAOID || datatype == RAWOID || datatype == BLOBOID) { + /* length of bytes */ + datum_len = VARSIZE_ANY_EXHDR(DatumGetPointer(src)); + if (prefix_len < datum_len) { + return PointerGetDatum(bytea_substring(src, 1, prefix_len, false)); + } + } else { + /* length of characters */ + datum_len = text_length(src); + if (prefix_len < datum_len) { + return PointerGetDatum(text_substring(src, 1, prefix_len, false)); + } + } + return src; +} + +void prefix_array_const_items(Const* arr_const, int prefixkey_len) +{ + ArrayBuildState* astate = NULL; + Datum arraydatum = arr_const->constvalue; + ArrayType* arrayval = DatumGetArrayTypeP(arraydatum); + Oid elem_type = ARR_ELEMTYPE(arrayval); + int16 typlen; + bool typbyval; + char typalign; + Datum* elem_values = NULL; + bool* elem_nulls = NULL; + int num_elems; + + get_typlenbyvalalign(elem_type, &typlen, &typbyval, &typalign); + deconstruct_array(arrayval, elem_type, typlen, typbyval, typalign, &elem_values, &elem_nulls, &num_elems); + + /* prefix every item */ + Datum new_item; + for (int i = 0; i < num_elems; i++) { + if (elem_nulls[i]) { + new_item = 0; + } else { + new_item = get_prefix_datum(elem_values[i], prefixkey_len, elem_type); + } + astate = accumArrayResult(astate, PointerGetDatum(new_item), elem_nulls[i], elem_type, CurrentMemoryContext); + } + + /* Replaced by prefixed array const. */ + arr_const->constvalue = makeArrayResult(astate, CurrentMemoryContext); + pfree_ext(elem_nulls); + pfree_ext(elem_values); +} + +void prefix_array_expr_items(ArrayExpr* arr_expr, int prefixkey_len) +{ + Node* chgnode = NULL; + + foreach_cell(lc, arr_expr->elements) { + chgnode = (Node*)lfirst(lc); + if (IsA(chgnode, Const)) { + chgnode = (Node*)prefix_const_node((Const*)chgnode, prefixkey_len, ((Const*)chgnode)->consttype); + } else { + PrefixKey* pexpr = makeNode(PrefixKey); + pexpr->length = prefixkey_len; + pexpr->arg = (Expr*)chgnode; + chgnode = (Node*)pexpr; + } + + lfirst(lc) = chgnode; + } +} + +static Oid replace_operator_by_strategy(Oid op_oid, Oid opfamily, int16 strategy) +{ + Oid ltype; + Oid rtype; + get_operator_types(op_oid, <ype, &rtype); + return get_opfamily_member(opfamily, ltype, rtype, strategy); +} + +RestrictInfo* expand_indexqual_scalar_array_op_expr(IndexOptInfo* index, RestrictInfo* rinfo, + Oid opfamily, int indexcol) +{ + int prefixkey_len = get_index_column_prefix_lenth(index, indexcol); + /* Not a prefix key, keep original condition. */ + if (prefixkey_len == 0) { + return rinfo; + } + + /* Copy condition as new. Rewrite it later. */ + RestrictInfo* new_rinfo = (RestrictInfo*)copyObject((Node*)rinfo); + ScalarArrayOpExpr* saop = (ScalarArrayOpExpr*)new_rinfo->clause; + Oid op_oid = saop->opno; + Node* rexpr = (Node*)lsecond(saop->args); /* 2nd argument of ScalarArrayOpExpr should not null */ + + /* Skip labals node. */ + rexpr = strip_array_coercion(rexpr); + if (rexpr && IsA(rexpr, Const)) { + prefix_array_const_items((Const*)rexpr, prefixkey_len); + } else if (rexpr && IsA(rexpr, ArrayExpr) && !((ArrayExpr*)rexpr)->multidims) { + prefix_array_expr_items((ArrayExpr*)rexpr, prefixkey_len); + } else { + /* Should not happen. */ + ereport(ERROR, (errcode(ERRCODE_UNRECOGNIZED_NODE_TYPE), + errmsg("unsupported indexqual type when expand indexqual conditions: %d", (int)nodeTag(saop)), + errdetail("Cannot rewrite expression %d for prefix key index.", rexpr ? (int)nodeTag(rexpr) : 0))); + } + + /* + * Operators "> and "<" may cause required keys to be skipped. + * Replace them with ">=" or "<=". + */ + int strategy = get_op_opfamily_strategy(op_oid, opfamily); + if (strategy == BTGreaterStrategyNumber) { + saop->opno = replace_operator_by_strategy(op_oid, opfamily, BTGreaterStrategyNumber); + } else if (strategy == BTLessStrategyNumber) { + saop->opno = replace_operator_by_strategy(op_oid, opfamily, BTLessStrategyNumber); + } + + if (saop->opno == InvalidOid) { + ereport(ERROR, (errcode(ERRCODE_OPTIMIZER_INCONSISTENT_STATE), + errmsg("no >= or <= operator for opfamily %u when generate indexqual for prefix key", opfamily))); + } + saop->opfuncid = get_opcode(saop->opno); + return new_rinfo; +} diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index a6ac45f3ba..e5aca48a78 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -31,6 +31,7 @@ typedef struct OpBtreeInterpretation { /* I/O function selector for get_type_io_data */ typedef enum IOFuncSelector { IOFunc_input, IOFunc_output, IOFunc_receive, IOFunc_send } IOFuncSelector; +extern void get_operator_types(Oid opno, Oid* leftType, Oid* rightType); extern bool op_in_opfamily(Oid opno, Oid opfamily); extern int get_op_opfamily_strategy(Oid opno, Oid opfamily); extern Oid get_op_opfamily_sortfamily(Oid opno, Oid opfamily); diff --git a/src/test/regress/expected/prefixkey_index.out b/src/test/regress/expected/prefixkey_index.out index 40056fd1cb..031d047c78 100644 --- a/src/test/regress/expected/prefixkey_index.out +++ b/src/test/regress/expected/prefixkey_index.out @@ -3889,5 +3889,164 @@ WHERE T2.c_phone in ('N','L','8','X','u','e','S','2','t','p','C') DROP TABLE IF EXISTS hbom_t; DROP TABLE IF EXISTS bmsql_prefixkey; +-- test prefix index with in-expr +drop table if exists t1; +NOTICE: table "t1" does not exist, skipping +create table t1( + id integer, + name text, + addr varchar(20) +); +create index it1 on t1 using btree(name(5)); +insert into t1 values (1, '1234567890', '1234567890'); +insert into t1 values (2, '1234567890123', '1234567890123'); +insert into t1 values (3, 'a1234567890', '1234567890'); +insert into t1 values (4, '12345', '12345'); +table t1; + id | name | addr +----+---------------+--------------- + 1 | 1234567890 | 1234567890 + 2 | 1234567890123 | 1234567890123 + 3 | a1234567890 | 1234567890 + 4 | 12345 | 12345 +(4 rows) + +set enable_seqscan=on; +set enable_indexscan=off; +explain (costs off) select * from t1 where name in ('1234567890', '1234567890123'); + QUERY PLAN +--------------------------------------------------------------- + Seq Scan on t1 + Filter: (name = ANY ('{1234567890,1234567890123}'::text[])) +(2 rows) + +select * from t1 where name in ('1234567890', '1234567890123'); -- two + id | name | addr +----+---------------+--------------- + 1 | 1234567890 | 1234567890 + 2 | 1234567890123 | 1234567890123 +(2 rows) + +select * from t1 where name in ('12345', '123456'); -- one + id | name | addr +----+-------+------- + 4 | 12345 | 12345 +(1 row) + +select * from t1 where name in ('12345'); -- one + id | name | addr +----+-------+------- + 4 | 12345 | 12345 +(1 row) + +select * from t1 where name in ('123456'); -- none + id | name | addr +----+------+------ +(0 rows) + +set enable_seqscan=off; +set enable_indexscan=on; +explain (costs off) select * from t1 where name in ('1234567890', '1234567890123'); + QUERY PLAN +--------------------------------------------------------------- + Bitmap Heap Scan on t1 + Filter: (name = ANY ('{1234567890,1234567890123}'::text[])) + -> Bitmap Index Scan on it1 + Index Cond: (name = ANY ('{12345,12345}'::text[])) +(4 rows) + +select * from t1 where name in ('1234567890', '1234567890123'); -- two + id | name | addr +----+---------------+--------------- + 1 | 1234567890 | 1234567890 + 2 | 1234567890123 | 1234567890123 +(2 rows) + +select * from t1 where name in ('12345', '123456'); -- one + id | name | addr +----+-------+------- + 4 | 12345 | 12345 +(1 row) + +select * from t1 where name in ('12345'); -- one + id | name | addr +----+-------+------- + 4 | 12345 | 12345 +(1 row) + +select * from t1 where name in ('123456'); -- none + id | name | addr +----+------+------ +(0 rows) + +-- test prefix index with any-expr +set enable_seqscan=on; +set enable_indexscan=off; +explain (costs off) select * from t1 where name = any('{1234567890, a1234567890}'::text[]); + QUERY PLAN +------------------------------------------------------------- + Seq Scan on t1 + Filter: (name = ANY ('{1234567890,a1234567890}'::text[])) +(2 rows) + +select * from t1 where name = any('{1234567890, a1234567890}'::text[]); -- two + id | name | addr +----+-------------+------------ + 1 | 1234567890 | 1234567890 + 3 | a1234567890 | 1234567890 +(2 rows) + +select * from t1 where name = any('{12345, 123456}'::text[]); -- one + id | name | addr +----+-------+------- + 4 | 12345 | 12345 +(1 row) + +select * from t1 where name = any('{12345}'::text[]); -- one + id | name | addr +----+-------+------- + 4 | 12345 | 12345 +(1 row) + +select * from t1 where name = any('{123456}'::text[]); -- none + id | name | addr +----+------+------ +(0 rows) + +set enable_seqscan=off; +set enable_indexscan=on; +explain (costs off) select * from t1 where name = any('{1234567890, a1234567890}'::text[]); + QUERY PLAN +------------------------------------------------------------- + Bitmap Heap Scan on t1 + Filter: (name = ANY ('{1234567890,a1234567890}'::text[])) + -> Bitmap Index Scan on it1 + Index Cond: (name = ANY ('{12345,a1234}'::text[])) +(4 rows) + +select * from t1 where name = any('{1234567890, a1234567890}'::text[]); -- two + id | name | addr +----+-------------+------------ + 1 | 1234567890 | 1234567890 + 3 | a1234567890 | 1234567890 +(2 rows) + +select * from t1 where name = any('{12345, 123456}'::text[]); -- one + id | name | addr +----+-------+------- + 4 | 12345 | 12345 +(1 row) + +select * from t1 where name = any('{12345}'::text[]); -- one + id | name | addr +----+-------+------- + 4 | 12345 | 12345 +(1 row) + +select * from t1 where name = any('{123456}'::text[]); -- none + id | name | addr +----+------+------ +(0 rows) + \c regression drop database prefix_index_db; diff --git a/src/test/regress/sql/prefixkey_index.sql b/src/test/regress/sql/prefixkey_index.sql index be9adc04f4..6dc3dfc118 100644 --- a/src/test/regress/sql/prefixkey_index.sql +++ b/src/test/regress/sql/prefixkey_index.sql @@ -1020,5 +1020,51 @@ WHERE T2.c_phone in ('N','L','8','X','u','e','S','2','t','p','C') DROP TABLE IF EXISTS hbom_t; DROP TABLE IF EXISTS bmsql_prefixkey; +-- test prefix index with in-expr +drop table if exists t1; +create table t1( + id integer, + name text, + addr varchar(20) +); +create index it1 on t1 using btree(name(5)); + +insert into t1 values (1, '1234567890', '1234567890'); +insert into t1 values (2, '1234567890123', '1234567890123'); +insert into t1 values (3, 'a1234567890', '1234567890'); +insert into t1 values (4, '12345', '12345'); +table t1; + +set enable_seqscan=on; +set enable_indexscan=off; +explain (costs off) select * from t1 where name in ('1234567890', '1234567890123'); +select * from t1 where name in ('1234567890', '1234567890123'); -- two +select * from t1 where name in ('12345', '123456'); -- one +select * from t1 where name in ('12345'); -- one +select * from t1 where name in ('123456'); -- none +set enable_seqscan=off; +set enable_indexscan=on; +explain (costs off) select * from t1 where name in ('1234567890', '1234567890123'); +select * from t1 where name in ('1234567890', '1234567890123'); -- two +select * from t1 where name in ('12345', '123456'); -- one +select * from t1 where name in ('12345'); -- one +select * from t1 where name in ('123456'); -- none + +-- test prefix index with any-expr +set enable_seqscan=on; +set enable_indexscan=off; +explain (costs off) select * from t1 where name = any('{1234567890, a1234567890}'::text[]); +select * from t1 where name = any('{1234567890, a1234567890}'::text[]); -- two +select * from t1 where name = any('{12345, 123456}'::text[]); -- one +select * from t1 where name = any('{12345}'::text[]); -- one +select * from t1 where name = any('{123456}'::text[]); -- none +set enable_seqscan=off; +set enable_indexscan=on; +explain (costs off) select * from t1 where name = any('{1234567890, a1234567890}'::text[]); +select * from t1 where name = any('{1234567890, a1234567890}'::text[]); -- two +select * from t1 where name = any('{12345, 123456}'::text[]); -- one +select * from t1 where name = any('{12345}'::text[]); -- one +select * from t1 where name = any('{123456}'::text[]); -- none + \c regression drop database prefix_index_db; \ No newline at end of file -- Gitee