
SingleStore Notebooks
In [1]:
1
import pandas as pd2
import singlestoredb as s23
s2_conn = s2.connect()4
s2_cur = s2_conn.cursor()
In [2]:
1
database_name = input('Enter database name:')
In [3]:
1
query = """SELECT table_name FROM information_schema.tables WHERE table_schema = '{}' AND table_type = 'BASE TABLE' AND storage_type = 'COLUMNSTORE';""".format(database_name)2
result = s2_cur.execute(query)3
result_df = pd.DataFrame(list(s2_cur))4
list_of_tables = result_df[[0]].values
In [4]:
1
# get queries to warm up columm files2
column_queries_df = pd.DataFrame()3
4
for table_name in list_of_tables:5
query = """ WITH t1 AS (SELECT c.column_name, c.ordinal_position, MAX(c.ordinal_position) OVER (ORDER BY c.ordinal_position DESC) AS last_row FROM information_schema.columns c JOIN information_schema.tables t ON c.table_catalog = t.table_catalog AND c.table_schema = t.table_schema AND c.table_name = t.table_name WHERE c.table_schema = '{0}' AND c.table_name = '{1}') SELECT CASE WHEN ordinal_position = 1 AND ordinal_position = last_row THEN CONCAT('SELECT ', 'AVG(LENGTH(`',column_name,'`)) FROM ', '{1}') WHEN ordinal_position = 1 and ordinal_position != last_row THEN CONCAT('SELECT ', 'AVG(LENGTH(`',column_name,'`)),') WHEN ordinal_position != last_row THEN CONCAT('AVG(LENGTH(`',column_name,'`)),') ELSE CONCAT('AVG(LENGTH(`',column_name,'`)) FROM ', '{1}') END AS query_text FROM t1 ORDER BY ordinal_position; """.format(database_name, table_name[0])6
result = s2_cur.execute(query)7
result_df = pd.DataFrame(list(s2_cur))8
result_df['table_name'] = table_name[0]9
column_queries_df = pd.concat([column_queries_df, result_df], axis=0)10
11
column_queries_df.rename(columns = {0:'query_text'}, inplace = True)12
final_column_df = column_queries_df.groupby('table_name')['query_text'].apply(' '.join).reset_index()13
final_column_df['query_text'] = final_column_df['query_text'].astype(str) + ';'14
15
# run column file warm up queries16
for query in final_column_df[['query_text']].values:17
s2_cur.execute(""" {} """.format(query[0]))
In [5]:
1
# get queries to warm up index files2
index_queries_df = pd.DataFrame()3
4
for table_name in list_of_tables:5
query = """ SELECT DISTINCT CONCAT("OPTIMIZE TABLE ", table_name, " WARM BLOB CACHE FOR INDEX ", "`", index_name, "`", ";") FROM information_schema.statistics WHERE TABLE_SCHEMA = '{}' AND index_type = 'COLUMNSTORE HASH' AND table_name = '{}'; """.format(database_name, table_name[0])6
result = s2_cur.execute(query)7
result_df = pd.DataFrame(list(s2_cur))8
index_queries_df = pd.concat([index_queries_df, result_df], axis=0)9
10
# run index file warm up queries11
for query in index_queries_df.values:12
s2_cur.execute(""" {} """.format(query[0]))
Runs through the queries to bring data residing in object storage onto disk for a specified database.
This Notebook can be run in Standard and Enterprise deployments.
This Notebook has been released under the Apache 2.0 open source license.
Launch this notebook in SingleStore and start executing queries instantly.