new files for comissioning
This commit is contained in:
parent
a9ff3880e2
commit
9b7bb945bc
11
predict_pipeline/check_python_version.py
Normal file
11
predict_pipeline/check_python_version.py
Normal file
@ -0,0 +1,11 @@
|
||||
# from tools import db_helpers
|
||||
import sys
|
||||
|
||||
|
||||
def main():
|
||||
print(sys.version)
|
||||
# db_helpers.add_columns_to_table()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
211
predict_pipeline/fill_db.ipynb
Normal file
211
predict_pipeline/fill_db.ipynb
Normal file
@ -0,0 +1,211 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0d70a13f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"sys.path.append('/home/edgekit/MSY_FS/fahrsimulator_msy2526_ai/tools')\n",
|
||||
"import pandas as pd\n",
|
||||
"from pathlib import Path\n",
|
||||
"import db_helpers"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ce696366",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"database_path = Path(r\"/home/edgekit/MSY_FS/databases/rawdata.sqlite\")\n",
|
||||
"parquet_path = Path(r\"/home/edgekit/MSY_FS/fahrsimulator_msy2526_ai/files_for_testing/both_mod_0000.parquet\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b1aa9398",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset = pd.read_parquet(parquet_path)\n",
|
||||
"dataset.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b183746e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset.dtypes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "24ed769d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"con, cursor = db_helpers.connect_db(database_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e604ed30",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df_clean = dataset.drop(columns=['subjectID','rowID', 'STUDY', 'LEVEL', 'PHASE'])\n",
|
||||
"df_first_100 = df_clean.head(200)\n",
|
||||
"df_first_100 = df_first_100.reset_index(drop=True)\n",
|
||||
"df_first_100.insert(0, '_Id', df_first_100.index + 1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e77a812e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def pandas_to_sqlite_dtype(dtype):\n",
|
||||
" if pd.api.types.is_integer_dtype(dtype):\n",
|
||||
" return \"INTEGER\"\n",
|
||||
" if pd.api.types.is_float_dtype(dtype):\n",
|
||||
" return \"REAL\"\n",
|
||||
" if pd.api.types.is_bool_dtype(dtype):\n",
|
||||
" return \"INTEGER\"\n",
|
||||
" if pd.api.types.is_datetime64_any_dtype(dtype):\n",
|
||||
" return \"TEXT\"\n",
|
||||
" return \"TEXT\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0e8897b2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"columns = {\n",
|
||||
" col: pandas_to_sqlite_dtype(dtype)\n",
|
||||
" for col, dtype in df_first_100.dtypes.items()\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"constraints = {\n",
|
||||
" \"_Id\": [\"NOT NULL\"]\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"primary_key = {\n",
|
||||
" \"pk_df_first_100\": [\"_Id\"]\n",
|
||||
"}\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4ab57624",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sql = db_helpers.create_table(\n",
|
||||
" conn=con,\n",
|
||||
" cursor=cursor,\n",
|
||||
" table_name=\"rawdata\",\n",
|
||||
" columns=columns,\n",
|
||||
" constraints=constraints,\n",
|
||||
" primary_key=primary_key,\n",
|
||||
" commit=True\n",
|
||||
")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "25096a7f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"columns_to_insert = {\n",
|
||||
" col: df_first_100[col].tolist()\n",
|
||||
" for col in df_first_100.columns\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7a5a3aa8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db_helpers.insert_rows_into_table(\n",
|
||||
" conn=con,\n",
|
||||
" cursor=cursor,\n",
|
||||
" table_name=\"rawdata\",\n",
|
||||
" columns=columns_to_insert,\n",
|
||||
" commit=True\n",
|
||||
")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b56beae2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"a = db_helpers.get_data_from_table(conn=con, table_name='rawdata',columns_list=['*'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a4a74a9d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"a.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "da0f8737",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db_helpers.disconnect_db(con, cursor)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "MSY_FS_env",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user