I’m able to repro your failure for query 20 with the below.
Looks like the where s_supplier in
gets pushed down as filter into the subselect. The error message roughly means our distributed planner isn’t sending that subselect to the right leaf nodes - the correlated subselect will need a s_supplier
value from another leaf node.
I think there is likely some rewrite or table schema tweak to make this work, though I am not sure offhand what that might be.
drop all from plancache;
drop database if exists db;
create database db;
use db;
CREATE TABLE supplier (
S_SUPPKEY SERIAL PRIMARY KEY,
S_NAME CHAR(25),
S_ADDRESS VARCHAR(40),
S_NATIONKEY BIGINT NOT NULL, -- references N_NATIONKEY
S_PHONE CHAR(15),
S_ACCTBAL DECIMAL,
S_COMMENT VARCHAR(101)
);
CREATE TABLE nation (
N_NATIONKEY SERIAL PRIMARY KEY,
N_NAME CHAR(25),
N_REGIONKEY BIGINT NOT NULL, -- references R_REGIONKEY
N_COMMENT VARCHAR(152)
);
create table stock (
s_i_id int not null,
s_w_id int not null,
s_quantity smallint,
s_dist_01 char(24),
s_dist_02 char(24),
s_dist_03 char(24),
s_dist_04 char(24),
s_dist_05 char(24),
s_dist_06 char(24),
s_dist_07 char(24),
s_dist_08 char(24),
s_dist_09 char(24),
s_dist_10 char(24),
s_ytd decimal(8,0),
s_order_cnt smallint,
s_remote_cnt smallint,
s_data varchar(50),
PRIMARY KEY(s_w_id, s_i_id),
shard key(s_w_id)
);
-- page 16 of http://www.tpc.org/tpc_documents_current_versions/pdf/tpc-c_v5.11.0.pdf
create table orderline(
ol_o_id int,
ol_d_id int,
ol_w_id int,
ol_number int,
ol_i_id int,
ol_supply_w_id int,
ol_delivery_d datetime default null,
ol_quantity numeric(2),
ol_amount numeric(6, 2),
ol_dist_info char(24),
primary key(ol_w_id, ol_d_id, ol_o_id, ol_number)
);
create table item (
i_id int not null,
i_im_id int,
i_name varchar(24),
i_price decimal(5,2),
i_data varchar(50),
PRIMARY KEY(i_id)
);
select s_name, s_address
from supplier, nation
where s_suppkey in
(select mod(s_i_id * s_w_id, 10000)
from stock, orderline
where s_i_id in
(select i_id
from item
where i_data like 'co%')
and ol_i_id=s_i_id
and ol_delivery_d > '2010-05-23 12:00:00'
group by s_i_id, s_w_id, s_quantity
having 2*s_quantity > sum(ol_quantity))
and s_nationkey = n_nationkey
and n_name = 'Germany'
order by s_name;
-- Found correlated subselect that cannot be rewritten:
-- (`supplier`.`s_suppkey` in
-- (
-- select distinct ((`stock`.`s_i_id`*`stock`.`s_w_id`)%?) as `mod(s_i_id * s_w_id, 10000)`
-- from ((`?`.`stock` as `stock`
-- join `?`.`orderline` as `orderline` )
-- join `?`.`item` as `item` )
-- where ((`stock`.`s_i_id` = `item`.`i_id`)
-- and (`supplier`.`s_suppkey` = ((`stock`.`s_i_id`*`stock`.`s_w_id`)%?))
-- and (`orderline`.`ol_i_id` = `stock`.`s_i_id`)
-- and (`orderline`.`ol_delivery_d` > (?!:>datetime(6) null))
-- and (`item`.`i_data` like ?))
-- group by `stock`.`s_i_id`,
-- `stock`.`s_w_id`,
-- `stock`.`s_quantity`
-- having ((?*`stock`.`s_quantity`) > sum(`orderline`.`ol_quantity`)) )
-- )