The reproduction procedure is as follows.
-
I built a MemSQL cluster with two leaf nodes on AWS according to the following document.
Use a valid Enterprise License Key license for one month.
As a result, one aggregator node, one master node, and two leaf nodes are activated.
SingleStoreDB Cloud Β· SingleStore Documentation -
Next, I constructed and populated the table with the following DDL and script.
see DDL & script below. -
Finally, from the Java program, the following query was executed simultaneously from 50 threads at one second intervals.
Then, the CPU load on the master node is nearly 100%.
There was almost no CPU load on the aggregator node, and CPU load on the leaf node was around 10%.
see query below.
Increasing the number of leaf nodes from 2 to 3 or 4 does not change this trend and does not improve performance. (Can not scale out)
Perhaps the Master node is the bottleneck.
Is there any solution?
==
-
DDL
CREATE DATABASE verification;
USE verification;CREATE TABLE Account (
AccountID varchar(50) NOT NULL
, AccountStructureCode varchar(50)
, PRIMARY KEY (AccountID)
);CREATE TABLE AccountStructure (
AccountStructureCode varchar(50) NOT NULL
, AccountStructureName varchar(230)
, ParentAccountStructureCode varchar(50)
, AccountDisplayPriority integer
, AccountStructureLevel integer
, PRIMARY KEY (AccountStructureCode)
);CREATE TABLE TimeRange (
TimeRangeType varchar(50) NOT NULL
, LowRange numeric(7,4) NOT NULL
, HighRange numeric(7,4) NOT NULL
, Label varchar(50)
, TimeRangeDisplayPriority integer
, PRIMARY KEY (TimeRangeType, LowRange, HighRange)
);CREATE TABLE Grid (
AccountID varchar(50) NOT NULL
, Currency varchar(50) NOT NULL
, LargeCategoryType varchar(50) NOT NULL
, MiddleCategoryType varchar(50) NOT NULL
, TimeRange decimal(19,4) NOT NULL
, Grid decimal(19,4) NOT NULL
, SHARD KEY (AccountID) USING CLUSTERED COLUMNSTORE
);CREATE VIEW FlattenedAccount AS
SELECT
b.AccountID AS L8
, L7.AccountStructureCode AS L7
, L6.AccountStructureCode AS L6
, L5.AccountStructureCode AS L5
, L4.AccountStructureCode AS L4
, L3.AccountStructureCode AS L3
, L2.AccountStructureCode AS L2
, L2.ParentAccountStructureCode AS L1
FROM
Account AS b
INNER JOIN AccountStructure AS L7
ON b.AccountStructureCode = L7.AccountStructureCode
INNER JOIN AccountStructure AS L6
ON L7.ParentAccountStructureCode = L6.AccountStructureCode
INNER JOIN AccountStructure AS L5
ON L6.ParentAccountStructureCode = L5.AccountStructureCode
INNER JOIN AccountStructure AS L4
ON L5.ParentAccountStructureCode = L4.AccountStructureCode
INNER JOIN AccountStructure AS L3
ON L4.ParentAccountStructureCode = L3.AccountStructureCode
INNER JOIN AccountStructure AS L2
ON L3.ParentAccountStructureCode = L2.AccountStructureCode
;INSERT INTO TimeRange VALUES(βNormalβ, 0, 1,β1Yβ, 1);
INSERT INTO TimeRange VALUES(βNormalβ, 1, 2,β2Yβ, 2);
β¦INSERT INTO AccountStructure VALUES(βRootβ, βNULLβ, βNULLβ, NULL, 1);
INSERT INTO AccountStructure VALUES(βA1000β, βNULLβ, βAK000β, NULL, 2);
β¦INSERT INTO Account VALUES(β100β, βA1101111β);
INSERT INTO Account VALUES(β101β, βA1101112β);
β¦ -
script (grid.py)
This script generates 768,000 rows.import random
currencies = [βJPYβ, βUSDβ, βAUDβ, βNZDβ, βCADβ, βHKDβ, βCNYβ, βSGDβ, βBRLβ, βZARβ]
accounts = [β100β,β101β,β110β,β111β,β120β,β121β,β130β,β131β,β200β,β201β,β210β,β211β,β220β,β221β,β230β,β231β,β300β,β301β,β310β,β311β,β320β,β321β,β330β,β331β,β400β,β401β,β410β,β411β,β420β,β421β,β430β,β431β,β500β,β501β,β510β,β511β,β520β,β521β,β530β,β531β,β600β,β601β,β610β,β611β,β620β,β621β,β630β,β631β,β700β,β701β,β710β,β711β,β720β,β721β,β730β,β731β,β800β,β801β,β810β,β811β,β820β,β821β,β830β,β831β]
timeRanges = [0.25, 0.5, 0.75, 1, 1.25, 1.5, 1.75, 2, 2.25, 2.5, 2.75, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51]for i in range(10):
largeCategoryType = βLargeCategoryTypeβ + str(i+1)
for j in range(4):
middleCategoryType = βMiddleCategoryTypeβ + str(i+1) + β-β + str(j+1)
for k in range(len(currencies)):
currency = currencies[k]
for l in range(int(len(accounts)/2)):
accountId = accounts[l*2+1]
for m in range(len(timeRanges)):
timeRange = timeRanges[m]
value = random.random()
print(β"β + accountId + ββ,ββ + currency + ββ,ββ + largeCategoryType + ββ,ββ + middleCategoryType + β",β + str(timeRange) + β,β + str(value)) -
query
SELECT
grid.LargeCategoryType
, grid.MiddleCategoryType
, tr.Label
, SUM(grid.Grid) as Grid
FROM
Grid grid
INNER JOIN FlattenedAccount a
ON grid.AccountID = a.L8
INNER JOIN TimeRange tr
ON grid.TimeRange >= tr.LowRange
AND grid.TimeRange < tr.HighRange
WHERE
a.L1 = βAK000β
AND grid.Currency = βJPYβ
AND tr.TimeRangeType = βNormalβ
GROUP BY
grid.LargeCategoryType
, grid.MiddleCategoryType
, tr.Label
, tr.TimeRangeDisplayPriority
ORDER BY
grid.LargeCategoryType
, grid.MiddleCategoryType
, tr.TimeRangeDisplayPriority
;