Hi @hanson ,
Thank you for looking into this.
I’m trying to merge data from different period into a single table. The destination table (SpotTrade_TS_History) has data being inserted into every 5 min. The source table i’m getting data from has 945,130,366 rows. As you can see below i’m controlling number of rows based on time ranges.
I had done some test on SpotTrade_TS_History table which is supposed to be the final destination table and it was struggling with 200k inserts. So i decided to create a separate table SpotTrade_TS_2021_10 (with no data in it) and insert into it. As you can see it takes 10 min to insert 75,500,543 rows.
We’re using a S4.
Insert ignore into SpotTrade_TS_2021_10 (ExchangeTradeId , TokenId , Price , Size , Pair , Side , ExchangeId ,TStampTraded,TSTradeDate,TStampEdited,TStampInserted) Select ExchangeTradeId , TokenId , Price , Size , Pair , Side , ExchangeId ,TStampTraded ,from_unixtime(TStampTraded) as TSTradeDate,TStampEdited ,TStampInserted FROM SpotTrade_Archive_202110 where TStampTraded >= UNIX_TIMESTAMP(date_format(date_add('2021-10-01 10:00:00', interval 13 day), '%Y-%m-%d 00:00:00')) and TStampTraded < UNIX_TIMESTAMP(date_format(date_add('2021-10-01 10:00:00', interval 13+1 day), '%Y-%m-%d 00:00:00'))
--------------
Query OK, 59321011 rows affected (10 min 59.56 sec)
Records: 0 Duplicates: 0 Warnings: 0
Bye
11/21 21:52:34 Import day
11/21 21:52:34 Starting import day
--------------
Insert ignore into SpotTrade_TS_2021_10 (ExchangeTradeId , TokenId , Price , Size , Pair , Side , ExchangeId ,TStampTraded,TSTradeDate,TStampEdited,TStampInserted) Select ExchangeTradeId , TokenId , Price , Size , Pair , Side , ExchangeId ,TStampTraded ,from_unixtime(TStampTraded) as TSTradeDate,TStampEdited ,TStampInserted FROM SpotTrade_Archive_202110 where TStampTraded >= UNIX_TIMESTAMP(date_format(date_add('2021-10-01 10:00:00', interval 14 day), '%Y-%m-%d 00:00:00')) and TStampTraded < UNIX_TIMESTAMP(date_format(date_add('2021-10-01 10:00:00', interval 14+1 day), '%Y-%m-%d 00:00:00'))
--------------
Query OK, 75500543 rows affected (9 min 23.41 sec)
Records: 0 Duplicates: 0 Warnings: 0
CREATE TABLE SpotTrade_TS_2021_10
(
ExchangeTradeId
varchar(30) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
TokenId
int(11) NOT NULL,
Price
decimal(20,10) NOT NULL,
Size
decimal(20,10) NOT NULL,
Pair
varchar(15) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
Side
varchar(15) CHARACTER SET utf8 COLLATE utf8_general_ci DEFAULT NULL,
ExchangeId
int(11) NOT NULL,
TStampTraded
decimal(25,10) NOT NULL,
TSTradeDate
datetime(6) DEFAULT NULL SERIES TIMESTAMP,
TStampEdited
decimal(25,10) DEFAULT NULL,
TStampInserted
decimal(25,10) NOT NULL,
KEY TStampTraded
(TStampTraded
) USING CLUSTERED COLUMNSTORE,
SHARD KEY idx_SHARDKEY
(TokenId
,ExchangeId
),
KEY idx_TokenId
(TokenId
) USING HASH,
KEY idx_ExchangeId
(ExchangeId
) USING HASH,
KEY idx_Price
(Price
) USING HASH,
KEY idx_Size
(Size
) USING HASH,
KEY idx_Pair
(Pair
) USING HASH,
KEY idx_TSTradeDate
(TSTradeDate
) USING HASH,
UNIQUE KEY TStampTraded_2
(TStampTraded
,TokenId
,ExchangeId
,Pair
,Size
,Price
,ExchangeTradeId
) USING HASH
) AUTOSTATS_CARDINALITY_MODE=INCREMENTAL AUTOSTATS_HISTOGRAM_MODE=CREATE AUTOSTATS_SAMPLING=ON SQL_MODE=‘STRICT_ALL_TABLES’
SpotTrade_Archive_202110
Create Table: CREATE TABLE SpotTrade_Archive_202110
(
ExchangeTradeId
varchar(30) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
TokenId
int(11) NOT NULL,
Price
decimal(20,10) NOT NULL,
Size
decimal(20,10) NOT NULL,
Pair
varchar(15) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
Side
varchar(15) CHARACTER SET utf8 COLLATE utf8_general_ci DEFAULT NULL,
ExchangeId
int(11) NOT NULL,
TStampTraded
decimal(25,10) NOT NULL,
TStampEdited
decimal(25,10) DEFAULT NULL,
TStampInserted
decimal(25,10) NOT NULL,
KEY TStampTraded
(TStampTraded
) USING CLUSTERED COLUMNSTORE,
SHARD KEY idx_SHARDKEY
(TStampTraded
,TokenId
,ExchangeId
,Pair
,Size
,Price
,ExchangeTradeId
),
KEY idx_TokenId
(TokenId
) USING HASH,
KEY idx_ExchangeId
(ExchangeId
) USING HASH
) AUTOSTATS_CARDINALITY_MODE=INCREMENTAL AUTOSTATS_HISTOGRAM_MODE=CREATE AUTOSTATS_SAMPLING=ON SQL_MODE=‘STRICT_ALL_TABLES’