I just analyzed a slow query and noticed a HashGroupBy bottleneck.
The Visual Explain feature in MemSQL Studio gives these messages:
encoded_group_by_unsupported: not a columnstore table
encoded_join_unsupported: probe side is not an unordered scan of columnstore
I was’t able to find any documentation regarding the above issues, but wonders why it mentions a join as this query only targets a single columnstore table.
How can I improve the query/schema to avoid the issues?
The Visual Explain feature indicate the first HashGroupBy executes fine but the second targeting remote_0 has some troubles. Is this caused by some data repartitioning?
I tried to add url_id and keyword_id to the clusted columnstore index but without any performance improvement.
The query below has also been optimized from the original query which didn’t have the url_id and keyword_id is not null clauses. Adding those two improved performance.
Without the url_id and keyword_id in the group by the query executes in less than 100 ms.
Query:
select
  keyword_id,
  url_id,
  device,
  date,
  sum(impressions) as impressions
from
  gsc_search_analytics
where
  site_id = 1
  and search_type = "web"
  and device is not null
  and query is not null
  and date between "2019-06-09 00:00:00"
  and "2019-06-23 00:00:00"
  and url_id is not null
  and keyword_id is not null
group by
  keyword_id,
  url_id,
  device,
  date;
Schema:
CREATE TABLE `gsc_search_analytics` (
  `id` bigint(20) unsigned NOT NULL,
  `site_id` bigint(20) unsigned NOT NULL,
  `url_id` char(32) CHARACTER SET utf8 COLLATE utf8_general_ci DEFAULT NULL,
  `keyword_id` char(32) CHARACTER SET utf8 COLLATE utf8_general_ci DEFAULT NULL,
  `date` date NOT NULL,
  `search_type` varchar(255) CHARACTER SET utf8 COLLATE utf8_unicode_ci NOT NULL,
  `country` varchar(255) CHARACTER SET utf8 COLLATE utf8_unicode_ci DEFAULT NULL,
  `device` varchar(255) CHARACTER SET utf8 COLLATE utf8_unicode_ci DEFAULT NULL,
  `page` varchar(2048) CHARACTER SET utf8 COLLATE utf8_unicode_ci DEFAULT NULL,
  `query` varchar(255) CHARACTER SET utf8 COLLATE utf8_unicode_ci DEFAULT NULL,
  `clicks` int(10) unsigned NOT NULL,
  `impressions` int(10) unsigned NOT NULL,
  `ctr` double NOT NULL,
  `position` double NOT NULL,
KEY `site_id` (`site_id`,`date`,`search_type`,`country`,`device`,`page`,`query`) USING CLUSTERED COLUMNSTORE,
SHARD KEY `date` (`date`,`id`),
FULLTEXT KEY `page` (`page`,`query`) );
Profile:
{ "profile":[ { "executor":"Project", "out":[ { "alias":"", "projection":"remote_0.keyword_id" }, { "alias":"", "projection":"remote_0.url_id" }, { "alias":"", "projection":"remote_0.device" }, { "alias":"", "projection":"remote_0.date" }, { "alias":"", "projection":"impressions" } ], "subselects":[], "actual_row_count":{ "value":300 }, "actual_total_time":{ "value":0 }, "start_time":{ "value":2054 }, "network_traffic":{ "value":25960 }, "network_time":{ "value":0 }, "inputs":[ { "executor":"Top", "limit":"@@SESSION.`sql_select_limit`", "actual_row_count":{ "value":300 }, "actual_total_time":{ "value":0 }, "inputs":[ { "executor":"HashGroupBy", "out":[ { "alias":"impressions", "projection":"SUM(remote_0.impressions)" } ], "groups":[ "remote_0.keyword_id", "remote_0.url_id", "remote_0.device", "remote_0.date" ], "encoded_group_by_unsupported":"not a columnstore table: 0tmp", "encoded_join_unsupported":"probe side is not an unordered scan of columnstore", "subselects":[], "actual_row_count":{ "value":850654 }, "actual_total_time":{ "value":1260 }, "start_time":{ "value":441 }, "memory_usage":{ "value":427819032 }, "inputs":[ { "executor":"Gather", "partitions":"all", "query":"SELECT `gsc_search_analytics`.`keyword_id` AS `keyword_id`, `gsc_search_analytics`.`url_id` AS `url_id`, `gsc_search_analytics`.`device` AS `device`, `gsc_search_analytics`.`date` AS `date`, SUM(`gsc_search_analytics`.`impressions`) AS `impressions` FROM `laravel_0`.`gsc_search_analytics` as `gsc_search_analytics` WHERE ((`gsc_search_analytics`.`site_id` = 1) AND (NOT ISNULL(`gsc_search_analytics`.`device`)) AND (NOT ISNULL(`gsc_search_analytics`.`query`)) AND (`gsc_search_analytics`.`date` BETWEEN '2019-06-09 00:00:00' AND '2019-06-23 00:00:00') AND (NOT ISNULL(`gsc_search_analytics`.`url_id`)) AND (NOT ISNULL(`gsc_search_analytics`.`keyword_id`)) AND (`gsc_search_analytics`.`search_type` = 'web')) GROUP BY 1, 2, 3, 4 OPTION(NO_QUERY_REWRITE=1, INTERPRETER_MODE=INTERPRET_FIRST)", "alias":"remote_0", "actual_row_count":{ "value":850654 }, "actual_total_time":{ "value":0 }, "start_time":{ "value":2054 }, "end_time":{ "value":2054 }, "inputs":[ { "executor":"Project", "out":[ { "alias":"", "projection":"gsc_search_analytics.keyword_id" }, { "alias":"", "projection":"gsc_search_analytics.url_id" }, { "alias":"", "projection":"gsc_search_analytics.device" }, { "alias":"", "projection":"gsc_search_analytics.date" }, { "alias":"", "projection":"impressions" } ], "subselects":[], "actual_row_count":{ "value":850654, "avg":212663.500000, "stddev":219.824362, "max":212850, "maxPartition":0 }, "actual_total_time":{ "value":247, "avg":242.500000, "stddev":4.500000, "max":247, "maxPartition":0 }, "start_time":{ "value":439, "avg":452.500000, "stddev":0.000000, "max":469, "maxPartition":3 }, "network_traffic":{ "value":89550478, "avg":22387619.500000, "stddev":23762.370899, "max":22407954, "maxPartition":0 }, "network_time":{ "value":1315, "avg":1296.000000, "stddev":19.000000, "max":1315, "maxPartition":0 }, "inputs":[ { "executor":"HashGroupBy", "out":[ { "alias":"impressions", "projection":"SUM(gsc_search_analytics.impressions)" } ], "groups":[ "gsc_search_analytics.keyword_id", "gsc_search_analytics.url_id", "gsc_search_analytics.device", "gsc_search_analytics.date" ], "subselects":[], "actual_row_count":{ "value":850654, "avg":212663.500000, "stddev":219.824362, "max":212850, "maxPartition":0 }, "actual_total_time":{ "value":186, "avg":183.000000, "stddev":3.000000, "max":186, "maxPartition":0 }, "start_time":{ "value":13, "avg":16.000000, "stddev":0.000000, "max":20, "maxPartition":3 }, "memory_usage":{ "value":408551488, "avg":102137872.000000, "stddev":108679.161130, "max":102236176, "maxPartition":0 }, "inputs":[ { "executor":"Filter", "condition":[ "gsc_search_analytics.site_id = ? AND gsc_search_analytics.device IS NOT NULL AND gsc_search_analytics.query IS NOT NULL AND gsc_search_analytics.date >= ? AND gsc_search_analytics.date <= ? AND gsc_search_analytics.url_id IS NOT NULL AND gsc_search_analytics.keyword_id IS NOT NULL AND gsc_search_analytics.search_type = ?" ], "subselects":[], "actual_row_count":{ "value":850654, "avg":212663.500000, "stddev":219.824362, "max":212850, "maxPartition":0 }, "actual_total_time":{ "value":7, "avg":5.500000, "stddev":1.500000, "max":7, "maxPartition":1 }, "start_time":{ "value":7, "avg":8.500000, "stddev":0.000000, "max":10, "maxPartition":1 }, "inputs":[ { "executor":"ColumnStoreScan", "db":"laravel", "table":"gsc_search_analytics", "alias":"gsc_search_analytics", "index":"KEY site_id (site_id, date, search_type, country, device, page, query) USING CLUSTERED COLUMNSTORE", "storage":"columnar", "est_table_rows":"1291561255", "est_filtered":"1", "actual_row_count":{ "value":9216000, "avg":2304000.000000, "stddev":443405.006738, "max":3072000, "maxPartition":3 }, "actual_total_time":{ "value":256, "avg":253.500000, "stddev":2.500000, "max":256, "maxPartition":1 }, "start_time":{ "value":0, "avg":0.750000, "stddev":0.000000, "max":1, "maxPartition":1 }, "memory_usage":{ "value":11010048, "avg":2752512.000000, "stddev":0.000000, "max":2752512, "maxPartition":0 }, "segments_scanned":{ "value":9, "avg":2.250000, "stddev":0.433013, "max":3, "maxPartition":3 }, "segments_skipped":{ "value":1260, "avg":315.000000, "stddev":0.000000, "max":315, "maxPartition":0 }, "segments_fully_contained":{ "value":0, "avg":0.000000, "stddev":0.000000, "max":0, "maxPartition":0 }, "segments_filter_encoded_data":{ "value":9, "avg":2.250000, "stddev":0.433013, "max":3, "maxPartition":3 }, "segments_encoded_group_by":{ "value":0, "avg":0.000000, "stddev":0.000000, "max":0, "maxPartition":0 }, "encoded_group_by_bailout":{ "Number of segments that reached dynamic dictionary size limit":"9" }, "inputs":[] } ] } ] } ] } ] } ] } ] } ] } ], "version":"2", "info":{ "memsql_version":"6.8.1", "memsql_version_hash":"d3d220c2727a9c1574b748f4ef8481830a4c766d", "num_online_leaves":"2", "num_online_aggs":"1", "context_database":"laravel" } }