add snowflake caching ;snowflake:

This commit is contained in:
dannydannydanny 2023-08-16 11:20:30 +02:00 committed by GitHub
parent c0a4e6eee3
commit 76e69d0899
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -1,3 +1,46 @@
## database file-caching
```
from pathlib import Path
from snowflake.connector import connect
import pandas as pd
import hashlib
def get_df(query, env, use_cache=True):
print(f'{query[:100] = }')
if env == "prod":
role = "COSUTMER"
warehouse = "CLOWN_COSTUMES"
elif env == "sand":
role = "CUSTOMER"
warehouse = "CUSTOMER_INFORMATION"
else:
raise ValueError(f"env must be prod or sand - recieved: {env}")
hash_query_4_digits = hashlib.shake_128(query.encode()).hexdigest(4)
cache_pkl_path = Path(f"/tmp/cache-{env}-{hash_query_4_digits}.pkl")
print(cache_pkl_path.exists(), cache_pkl_path)
if cache_pkl_path.exists() and use_cache:
print('loading cached data')
df = pd.read_pickle(cache_pkl_path)
return df
print("fetching & caching data")
with connect(
authenticator="externalbrowser",
user="dnth@lundbeck.com",
account=f"lundbeck-hlu{env}",
role=role,
warehouse=warehouse,
) as con:
cur = con.cursor()
cur.execute(query)
df = cur.fetch_pandas_all()
df.to_pickle(cache_pkl_path)
return df
```
## Python Kernel Management
```
# install new kernel "my_new_env"