SingleStore Notebooks
New
Automatically warm up your disk after resuming workspace
Notebook
Step 0. Import libraries & create connection to database
In [1]:
import pandas as pdimport singlestoredb as s2s2_conn = s2.connect()s2_cur = s2_conn.cursor()
Step 1. Specify which database you want to cache
In [2]:
database_name = input('Enter database name:')
Step 2. Get a list of the columnstore table names in your database
In [3]:
query = """SELECT table_name FROM information_schema.tables WHERE table_schema = '{}' AND table_type = 'BASE TABLE' AND storage_type = 'COLUMNSTORE';""".format(database_name)result = s2_cur.execute(query)result_df = pd.DataFrame(list(s2_cur))list_of_tables = result_df[[0]].values
Step 3. Cache columnar files
In [4]:
# get queries to warm up columm filescolumn_queries_df = pd.DataFrame()for table_name in list_of_tables:query = """ WITH t1 AS (SELECT c.column_name, c.ordinal_position, MAX(c.ordinal_position) OVER (ORDER BY c.ordinal_position DESC) AS last_row FROM information_schema.columns c JOIN information_schema.tables t ON c.table_catalog = t.table_catalog AND c.table_schema = t.table_schema AND c.table_name = t.table_name WHERE c.table_schema = '{0}' AND c.table_name = '{1}') SELECT CASE WHEN ordinal_position = 1 AND ordinal_position = last_row THEN CONCAT('SELECT ', 'AVG(LENGTH(`',column_name,'`)) FROM ', '{1}') WHEN ordinal_position = 1 and ordinal_position != last_row THEN CONCAT('SELECT ', 'AVG(LENGTH(`',column_name,'`)),') WHEN ordinal_position != last_row THEN CONCAT('AVG(LENGTH(`',column_name,'`)),') ELSE CONCAT('AVG(LENGTH(`',column_name,'`)) FROM ', '{1}') END AS query_text FROM t1 ORDER BY ordinal_position; """.format(database_name, table_name[0])result = s2_cur.execute(query)result_df = pd.DataFrame(list(s2_cur))result_df['table_name'] = table_name[0]column_queries_df = pd.concat([column_queries_df, result_df], axis=0)column_queries_df.rename(columns = {0:'query_text'}, inplace = True)final_column_df = column_queries_df.groupby('table_name')['query_text'].apply(' '.join).reset_index()final_column_df['query_text'] = final_column_df['query_text'].astype(str) + ';'# run column file warm up queriesfor query in final_column_df[['query_text']].values:s2_cur.execute(""" {} """.format(query[0]))
Step 4. Cache index files
In [5]:
# get queries to warm up index filesindex_queries_df = pd.DataFrame()for table_name in list_of_tables:query = """ SELECT DISTINCT CONCAT("OPTIMIZE TABLE ", table_name, " WARM BLOB CACHE FOR INDEX ", "`", index_name, "`", ";") FROM information_schema.statistics WHERE TABLE_SCHEMA = '{}' AND index_type = 'COLUMNSTORE HASH' AND table_name = '{}'; """.format(database_name, table_name[0])result = s2_cur.execute(query)result_df = pd.DataFrame(list(s2_cur))index_queries_df = pd.concat([index_queries_df, result_df], axis=0)# run index file warm up queriesfor query in index_queries_df.values:s2_cur.execute(""" {} """.format(query[0]))
Details
About this Template
Runs through the queries to bring data residing in object storage onto disk for a specified database.
This Notebook can be run in Standard and Enterprise deployments.
Tags
advancedautomationtuning
License
This Notebook has been released under the Apache 2.0 open source license.