From eb4bbbe84e8b02df6beb4f59c92e4568b74f1e41 Mon Sep 17 00:00:00 2001 From: everypaopao <13126797781@163.com> Date: Sat, 13 Aug 2022 22:41:06 +0800 Subject: [PATCH] add SQL Server table statistics --- hetu-docs/en/connector/sqlserver.md | 19 ++ hetu-docs/zh/connector/sqlserver.md | 19 ++ presto-sqlserver/pom.xml | 5 + .../plugin/sqlserver/SqlServerClient.java | 210 +++++++++++++++++- .../sqlserver/SqlServerClientModule.java | 1 + .../plugin/sqlserver/SqlServerConfig.java | 33 +++ 6 files changed, 286 insertions(+), 1 deletion(-) create mode 100644 presto-sqlserver/src/main/java/io/prestosql/plugin/sqlserver/SqlServerConfig.java diff --git a/hetu-docs/en/connector/sqlserver.md b/hetu-docs/en/connector/sqlserver.md index f60705f3d..076a3df4e 100644 --- a/hetu-docs/en/connector/sqlserver.md +++ b/hetu-docs/en/connector/sqlserver.md @@ -22,6 +22,25 @@ The SQL Server connector can only access a single database within a SQL Server s To add another catalog, simply add another properties file to `etc/catalog` with a different name (making sure it ends in `.properties`). For example, if you name the property file `sales.properties`, openLooKeng will create a catalog named `sales` using the configured connector. +Table statistics +---------------------- +The SQL Server connector can use table and column statistics for cost based optimizations, to improve query processing performance based on the actual data in the data source. + +``` properties +sqlserver.statistics.enabled=true +#true indicates that statistics is enabled, and false indicates that statistics is disabled, The default is false. +``` + +The SQL Server connector can use information stored in single-column statistics. SQL Server Database can automatically create column statistics for certain columns. If column statistics are not created automatically for a certain column, you can create them by executing the following statement in SQL Server Database. + + CREATE STATISTICS my_statistics_name ON table_schema.table_name (column_name); + +SQL Server Database routinely updates the statistics. In some cases, you may want to force statistics update (e.g. after defining new column statistics or after changing data in the table). You can do that by executing the following statement in SQL Server Database. + + UPDATE STATISTICS table_schema.table_name; + +Refer to SQL Server documentation for information about options, limitations and additional considerations. + Querying SQL Server ------------------- diff --git a/hetu-docs/zh/connector/sqlserver.md b/hetu-docs/zh/connector/sqlserver.md index b43cd4fa2..6cc718f9b 100644 --- a/hetu-docs/zh/connector/sqlserver.md +++ b/hetu-docs/zh/connector/sqlserver.md @@ -20,6 +20,25 @@ SQL Server连接器只能访问SQL Server服务器中的单个数据库。因此 要添加另一个目录,只需添加另一个属性文件到具有不同名称的`etc/catalog`中(确保它以`.properties`结尾)。例如,如果将属性文件命名为`sales.properties`,openLooKeng将使用配置的连接器创建一个名为`sales`的目录。 +### 表统计信息 + +SQL Server连接器可以使用表和列统计信息进行基于成本的优化,以根据数据源中的实际数据提高查询处理性能。 + +``` properties +sqlserver.statistics.enabled=true +#true表示启用了统计信息,false表示禁用了统计信息,默认为false。 +``` + +SQL Server连接器可以使用存储的单列统计信息。SQL Server数据库可以自动为某些列创建列统计信息。如果某个列的列统计信息不是自动创建的,您可以通过在SQL Server数据库中执行以下语句来创建它们。 + + CREATE STATISTICS my_statistics_name ON table_schema.table_name (column_name); + +SQL Server数据库会定期更新统计信息。在某些情况下,您可能希望强制更新统计信息(例如,在定义新列统计信息后或在更改表中的数据后)。您可以通过在SQL Server数据库中执行以下语句来实现这一点。 + + UPDATE STATISTICS table_schema.table_name; + +有关选项、限制和其他注意事项的信息,请参阅SQL Server文档。 + ## 查询SQL Server服务器 SQL Server连接器提供对配置数据库中指定用户可见的所有模式的访问。对于以下示例,假设SQL Server目录为`sqlserver`。 diff --git a/presto-sqlserver/pom.xml b/presto-sqlserver/pom.xml index 7fd2865fc..82d6e326d 100644 --- a/presto-sqlserver/pom.xml +++ b/presto-sqlserver/pom.xml @@ -41,6 +41,11 @@ mssql-jdbc + + org.jdbi + jdbi3-core + + javax.inject javax.inject diff --git a/presto-sqlserver/src/main/java/io/prestosql/plugin/sqlserver/SqlServerClient.java b/presto-sqlserver/src/main/java/io/prestosql/plugin/sqlserver/SqlServerClient.java index 06328d7a5..85adf390f 100644 --- a/presto-sqlserver/src/main/java/io/prestosql/plugin/sqlserver/SqlServerClient.java +++ b/presto-sqlserver/src/main/java/io/prestosql/plugin/sqlserver/SqlServerClient.java @@ -25,22 +25,37 @@ import io.prestosql.plugin.jdbc.JdbcTypeHandle; import io.prestosql.plugin.jdbc.StatsCollecting; import io.prestosql.plugin.jdbc.WriteMapping; import io.prestosql.spi.PrestoException; +import io.prestosql.spi.connector.ColumnHandle; import io.prestosql.spi.connector.ConnectorSession; import io.prestosql.spi.connector.SchemaTableName; import io.prestosql.spi.predicate.Domain; +import io.prestosql.spi.predicate.TupleDomain; +import io.prestosql.spi.statistics.ColumnStatistics; +import io.prestosql.spi.statistics.Estimate; +import io.prestosql.spi.statistics.TableStatistics; import io.prestosql.spi.type.CharType; import io.prestosql.spi.type.Type; import io.prestosql.spi.type.VarcharType; +import org.jdbi.v3.core.Handle; +import org.jdbi.v3.core.Jdbi; import javax.inject.Inject; +import java.sql.CallableStatement; import java.sql.Connection; +import java.sql.ResultSet; import java.sql.SQLException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.function.BiFunction; import java.util.function.UnaryOperator; import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkState; +import static com.google.common.base.Throwables.throwIfInstanceOf; +import static com.google.common.collect.MoreCollectors.toOptional; import static io.prestosql.plugin.jdbc.JdbcErrorCode.JDBC_ERROR; import static io.prestosql.plugin.jdbc.StandardColumnMappings.booleanWriteFunction; import static io.prestosql.plugin.jdbc.StandardColumnMappings.charWriteFunction; @@ -48,11 +63,13 @@ import static io.prestosql.plugin.jdbc.StandardColumnMappings.varcharWriteFuncti import static io.prestosql.spi.type.BooleanType.BOOLEAN; import static io.prestosql.spi.type.Varchars.isVarcharType; import static java.lang.String.format; +import static java.util.Objects.requireNonNull; public class SqlServerClient extends BaseJdbcClient { private static final Joiner DOT_JOINER = Joiner.on("."); + private final boolean statisticsEnabled; // SqlServer supports 2100 parameters in prepared statement, let's create a space for about 4 big IN predicates private static final int SQL_SERVER_MAX_LIST_EXPRESSIONS = 500; @@ -66,9 +83,10 @@ public class SqlServerClient }; @Inject - public SqlServerClient(BaseJdbcConfig config, @StatsCollecting ConnectionFactory connectionFactory) + public SqlServerClient(BaseJdbcConfig config, SqlServerConfig sqlServerConfig, @StatsCollecting ConnectionFactory connectionFactory) { super(config, "\"", connectionFactory); + this.statisticsEnabled = requireNonNull(sqlServerConfig, "statisticsConfig is null").isStatisticsEnabled(); } @Override @@ -173,4 +191,194 @@ public class SqlServerClient { return "\'" + literal + "\'"; } + + @Override + public TableStatistics getTableStatistics(ConnectorSession session, JdbcTableHandle handle, TupleDomain tupleDomain) + { + if (!statisticsEnabled) { + return TableStatistics.empty(); + } + + try { + return readTableStatistics(session, handle); + } + catch (SQLException | RuntimeException e) { + throwIfInstanceOf(e, PrestoException.class); + throw new PrestoException(JDBC_ERROR, "Failed fetching statistics for table: " + handle, e); + } + } + + private TableStatistics readTableStatistics(ConnectorSession session, JdbcTableHandle table) + throws SQLException + { + try (Connection connection = connectionFactory.openConnection(JdbcIdentity.from(session)); Handle handle = Jdbi.open(connection)) { + String catalog = table.getCatalogName(); + String schema = table.getSchemaName(); + String tableName = table.getTableName(); + + StatisticsDao statisticsDao = new StatisticsDao(handle); + Long tableObjectId = statisticsDao.getTableObjectId(catalog, schema, tableName); + if (tableObjectId == null) { + // Table not found + return TableStatistics.empty(); + } + + Long rowCount = statisticsDao.getRowCount(tableObjectId); + if (rowCount == null) { + // Table disappeared + return TableStatistics.empty(); + } + + if (rowCount == 0) { + return TableStatistics.empty(); + } + + TableStatistics.Builder tableStatistics = TableStatistics.builder(); + tableStatistics.setRowCount(Estimate.of(rowCount)); + + Map columnNameToStatisticsName = getColumnNameToStatisticsName(table, statisticsDao, tableObjectId); + + for (JdbcColumnHandle column : this.getColumns(session, table)) { + String statisticName = columnNameToStatisticsName.get(column.getColumnName()); + if (statisticName == null) { + // No statistic for column + continue; + } + + double averageColumnLength; + long notNullValues = 0; + long nullValues = 0; + long distinctValues = 0; + + try (CallableStatement showStatistics = handle.getConnection().prepareCall("DBCC SHOW_STATISTICS (?, ?)")) { + showStatistics.setString(1, format("%s.%s.%s", catalog, schema, tableName)); + showStatistics.setString(2, statisticName); + + boolean isResultSet = showStatistics.execute(); + checkState(isResultSet, "Expected SHOW_STATISTICS to return a result set"); + try (ResultSet resultSet = showStatistics.getResultSet()) { + checkState(resultSet.next(), "No rows in result set"); + + averageColumnLength = resultSet.getDouble("Average Key Length"); // NULL values are accounted for with length 0 + + checkState(!resultSet.next(), "More than one row in result set"); + } + + isResultSet = showStatistics.getMoreResults(); + checkState(isResultSet, "Expected SHOW_STATISTICS to return second result set"); + showStatistics.getResultSet().close(); + + isResultSet = showStatistics.getMoreResults(); + checkState(isResultSet, "Expected SHOW_STATISTICS to return third result set"); + try (ResultSet resultSet = showStatistics.getResultSet()) { + while (resultSet.next()) { + resultSet.getObject("RANGE_HI_KEY"); + if (resultSet.wasNull()) { + // Null fraction + checkState(resultSet.getLong("RANGE_ROWS") == 0, "Unexpected RANGE_ROWS for null fraction"); + checkState(resultSet.getLong("DISTINCT_RANGE_ROWS") == 0, "Unexpected DISTINCT_RANGE_ROWS for null fraction"); + checkState(nullValues == 0, "Multiple null fraction entries"); + nullValues += resultSet.getLong("EQ_ROWS"); + } + else { + notNullValues += resultSet.getLong("RANGE_ROWS") // rows strictly within a bucket + + resultSet.getLong("EQ_ROWS"); // rows equal to RANGE_HI_KEY + distinctValues += resultSet.getLong("DISTINCT_RANGE_ROWS") // NDV strictly within a bucket + + (resultSet.getLong("EQ_ROWS") > 0 ? 1 : 0); + } + } + } + } + + ColumnStatistics statistics = ColumnStatistics.builder() + .setNullsFraction(Estimate.of( + (notNullValues + nullValues == 0) + ? 1 + : (1.0 * nullValues / (notNullValues + nullValues)))) + .setDistinctValuesCount(Estimate.of(distinctValues)) + .setDataSize(Estimate.of(rowCount * averageColumnLength)) + .build(); + + tableStatistics.setColumnStatistics(column, statistics); + } + + return tableStatistics.build(); + } + } + + private static Map getColumnNameToStatisticsName(JdbcTableHandle table, StatisticsDao statisticsDao, Long tableObjectId) + { + List singleColumnStatistics = statisticsDao.getSingleColumnStatistics(tableObjectId); + + Map columnNameToStatisticsName = new HashMap<>(); + for (String statisticName : singleColumnStatistics) { + String columnName = statisticsDao.getSingleColumnStatisticsColumnName(tableObjectId, statisticName); + if (columnName == null) { + // Table or statistics disappeared + continue; + } + } + return columnNameToStatisticsName; + } + + private static class StatisticsDao + { + private final Handle handle; + + public StatisticsDao(Handle handle) + { + this.handle = requireNonNull(handle, "handle is null"); + } + + Long getTableObjectId(String catalog, String schema, String tableName) + { + return handle.createQuery("SELECT object_id(:table)") + .bind("table", format("%s.%s.%s", catalog, schema, tableName)) + .mapTo(Long.class) + .findOnly(); + } + + Long getRowCount(long tableObjectId) + { + return handle.createQuery("" + + "SELECT sum(rows) row_count " + + "FROM sys.partitions " + + "WHERE object_id = :object_id " + + "AND index_id IN (0, 1)") // 0 = heap, 1 = clustered index, 2 or greater = non-clustered index + .bind("object_id", tableObjectId) + .mapTo(Long.class) + .findOnly(); + } + + List getSingleColumnStatistics(long tableObjectId) + { + return handle.createQuery("" + + "SELECT s.name " + + "FROM sys.stats AS s " + + "JOIN sys.stats_columns AS sc ON s.object_id = sc.object_id AND s.stats_id = sc.stats_id " + + "WHERE s.object_id = :object_id " + + "GROUP BY s.name " + + "HAVING count(*) = 1 " + + "ORDER BY s.name") + .bind("object_id", tableObjectId) + .mapTo(String.class) + .list(); + } + + String getSingleColumnStatisticsColumnName(long tableObjectId, String statisticsName) + { + return handle.createQuery("" + + "SELECT c.name " + + "FROM sys.stats AS s " + + "JOIN sys.stats_columns AS sc ON s.object_id = sc.object_id AND s.stats_id = sc.stats_id " + + "JOIN sys.columns AS c ON sc.object_id = c.object_id AND c.column_id = sc.column_id " + + "WHERE s.object_id = :object_id " + + "AND s.name = :statistics_name") + .bind("object_id", tableObjectId) + .bind("statistics_name", statisticsName) + .mapTo(String.class) + .collect(toOptional()) + .orElse(null); + } + } } diff --git a/presto-sqlserver/src/main/java/io/prestosql/plugin/sqlserver/SqlServerClientModule.java b/presto-sqlserver/src/main/java/io/prestosql/plugin/sqlserver/SqlServerClientModule.java index 1bd903628..a94e3b35d 100644 --- a/presto-sqlserver/src/main/java/io/prestosql/plugin/sqlserver/SqlServerClientModule.java +++ b/presto-sqlserver/src/main/java/io/prestosql/plugin/sqlserver/SqlServerClientModule.java @@ -34,6 +34,7 @@ public class SqlServerClientModule { binder.bind(JdbcClient.class).to(SqlServerClient.class).in(Scopes.SINGLETON); configBinder(binder).bindConfig(BaseJdbcConfig.class); + configBinder(binder).bindConfig(SqlServerConfig.class); } @Provides diff --git a/presto-sqlserver/src/main/java/io/prestosql/plugin/sqlserver/SqlServerConfig.java b/presto-sqlserver/src/main/java/io/prestosql/plugin/sqlserver/SqlServerConfig.java new file mode 100644 index 000000000..f814fc4b6 --- /dev/null +++ b/presto-sqlserver/src/main/java/io/prestosql/plugin/sqlserver/SqlServerConfig.java @@ -0,0 +1,33 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.plugin.sqlserver; + +import io.airlift.configuration.Config; + +public class SqlServerConfig +{ + private boolean statisticsEnabled; + + public boolean isStatisticsEnabled() + { + return statisticsEnabled; + } + + @Config("sqlserver.statistics.enabled") + public SqlServerConfig setStatisticsEnabled(boolean statisticsEnabled) + { + this.statisticsEnabled = statisticsEnabled; + return this; + } +} -- Gitee