New

Automatically warm up your disk after resuming workspace

Notebook


SingleStore Notebooks

Automatically warm up your disk after resuming workspace

Step 0. Import libraries & create connection to database

In [1]:

1

import pandas as pd

2

import singlestoredb as s2

3

s2_conn = s2.connect()

4

s2_cur = s2_conn.cursor()

Step 1. Specify which database you want to cache

In [2]:

1

database_name = input('Enter database name:')

Step 2. Get a list of the columnstore table names in your database

In [3]:

1

query = """SELECT table_name FROM information_schema.tables WHERE table_schema = '{}' AND table_type = 'BASE TABLE' AND storage_type = 'COLUMNSTORE';""".format(database_name)

2

result = s2_cur.execute(query)

3

result_df = pd.DataFrame(list(s2_cur))

4

list_of_tables = result_df[[0]].values

Step 3. Cache columnar files

In [4]:

1

# get queries to warm up columm files

2

column_queries_df = pd.DataFrame()

3

4

for table_name in list_of_tables:

5

query = """ WITH t1 AS (SELECT c.column_name, c.ordinal_position, MAX(c.ordinal_position) OVER (ORDER BY c.ordinal_position DESC) AS last_row FROM information_schema.columns c JOIN information_schema.tables t ON c.table_catalog = t.table_catalog AND c.table_schema = t.table_schema AND c.table_name = t.table_name WHERE c.table_schema = '{0}' AND c.table_name = '{1}') SELECT CASE WHEN ordinal_position = 1 AND ordinal_position = last_row THEN CONCAT('SELECT ', 'AVG(LENGTH(`',column_name,'`)) FROM ', '{1}') WHEN ordinal_position = 1 and ordinal_position != last_row THEN CONCAT('SELECT ', 'AVG(LENGTH(`',column_name,'`)),') WHEN ordinal_position != last_row THEN CONCAT('AVG(LENGTH(`',column_name,'`)),') ELSE CONCAT('AVG(LENGTH(`',column_name,'`)) FROM ', '{1}') END AS query_text FROM t1 ORDER BY ordinal_position; """.format(database_name, table_name[0])

6

result = s2_cur.execute(query)

7

result_df = pd.DataFrame(list(s2_cur))

8

result_df['table_name'] = table_name[0]

9

column_queries_df = pd.concat([column_queries_df, result_df], axis=0)

10

11

column_queries_df.rename(columns = {0:'query_text'}, inplace = True)

12

final_column_df = column_queries_df.groupby('table_name')['query_text'].apply(' '.join).reset_index()

13

final_column_df['query_text'] = final_column_df['query_text'].astype(str) + ';'

14

15

# run column file warm up queries

16

for query in final_column_df[['query_text']].values:

17

s2_cur.execute(""" {} """.format(query[0]))

Step 4. Cache index files

In [5]:

1

# get queries to warm up index files

2

index_queries_df = pd.DataFrame()

3

4

for table_name in list_of_tables:

5

query = """ SELECT DISTINCT CONCAT("OPTIMIZE TABLE ", table_name, " WARM BLOB CACHE FOR INDEX ", "`", index_name, "`", ";") FROM information_schema.statistics WHERE TABLE_SCHEMA = '{}' AND index_type = 'COLUMNSTORE HASH' AND table_name = '{}'; """.format(database_name, table_name[0])

6

result = s2_cur.execute(query)

7

result_df = pd.DataFrame(list(s2_cur))

8

index_queries_df = pd.concat([index_queries_df, result_df], axis=0)

9

10

# run index file warm up queries

11

for query in index_queries_df.values:

12

s2_cur.execute(""" {} """.format(query[0]))

Details


About this Template

Runs through the queries to bring data residing in object storage onto disk for a specified database.

Notebook Icon

This Notebook can be run in Standard and Enterprise deployments.

Tags

advancedautomationtuning

License

This Notebook has been released under the Apache 2.0 open source license.

See Notebook in action

Launch this notebook in SingleStore and start executing queries instantly.