New

Automatically warm up your disk after resuming workspace

Notebook


SingleStore Notebooks

Automatically warm up your disk after resuming workspace

Step 0. Import libraries & create connection to database

In [1]:

import pandas as pd
import singlestoredb as s2
s2_conn = s2.connect()
s2_cur = s2_conn.cursor()

Step 1. Specify which database you want to cache

In [2]:

database_name = input('Enter database name:')

Step 2. Get a list of the columnstore table names in your database

In [3]:

query = """SELECT table_name FROM information_schema.tables WHERE table_schema = '{}' AND table_type = 'BASE TABLE' AND storage_type = 'COLUMNSTORE';""".format(database_name)
result = s2_cur.execute(query)
result_df = pd.DataFrame(list(s2_cur))
list_of_tables = result_df[[0]].values

Step 3. Cache columnar files

In [4]:

# get queries to warm up columm files
column_queries_df = pd.DataFrame()
for table_name in list_of_tables:
query = """ WITH t1 AS (SELECT c.column_name, c.ordinal_position, MAX(c.ordinal_position) OVER (ORDER BY c.ordinal_position DESC) AS last_row FROM information_schema.columns c JOIN information_schema.tables t ON c.table_catalog = t.table_catalog AND c.table_schema = t.table_schema AND c.table_name = t.table_name WHERE c.table_schema = '{0}' AND c.table_name = '{1}') SELECT CASE WHEN ordinal_position = 1 AND ordinal_position = last_row THEN CONCAT('SELECT ', 'AVG(LENGTH(`',column_name,'`)) FROM ', '{1}') WHEN ordinal_position = 1 and ordinal_position != last_row THEN CONCAT('SELECT ', 'AVG(LENGTH(`',column_name,'`)),') WHEN ordinal_position != last_row THEN CONCAT('AVG(LENGTH(`',column_name,'`)),') ELSE CONCAT('AVG(LENGTH(`',column_name,'`)) FROM ', '{1}') END AS query_text FROM t1 ORDER BY ordinal_position; """.format(database_name, table_name[0])
result = s2_cur.execute(query)
result_df = pd.DataFrame(list(s2_cur))
result_df['table_name'] = table_name[0]
column_queries_df = pd.concat([column_queries_df, result_df], axis=0)
column_queries_df.rename(columns = {0:'query_text'}, inplace = True)
final_column_df = column_queries_df.groupby('table_name')['query_text'].apply(' '.join).reset_index()
final_column_df['query_text'] = final_column_df['query_text'].astype(str) + ';'
# run column file warm up queries
for query in final_column_df[['query_text']].values:
s2_cur.execute(""" {} """.format(query[0]))

Step 4. Cache index files

In [5]:

# get queries to warm up index files
index_queries_df = pd.DataFrame()
for table_name in list_of_tables:
query = """ SELECT DISTINCT CONCAT("OPTIMIZE TABLE ", table_name, " WARM BLOB CACHE FOR INDEX ", "`", index_name, "`", ";") FROM information_schema.statistics WHERE TABLE_SCHEMA = '{}' AND index_type = 'COLUMNSTORE HASH' AND table_name = '{}'; """.format(database_name, table_name[0])
result = s2_cur.execute(query)
result_df = pd.DataFrame(list(s2_cur))
index_queries_df = pd.concat([index_queries_df, result_df], axis=0)
# run index file warm up queries
for query in index_queries_df.values:
s2_cur.execute(""" {} """.format(query[0]))

Details


About this Template

Runs through the queries to bring data residing in object storage onto disk for a specified database.

Notebook Icon

This Notebook can be run in Standard and Enterprise deployments.

Tags

advancedautomationtuning

License

This Notebook has been released under the Apache 2.0 open source license.