212 lines
4.6 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "0d70a13f",
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"sys.path.append('/home/edgekit/MSY_FS/fahrsimulator_msy2526_ai/tools')\n",
"import pandas as pd\n",
"from pathlib import Path\n",
"import db_helpers"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ce696366",
"metadata": {},
"outputs": [],
"source": [
"database_path = Path(r\"/home/edgekit/MSY_FS/databases/rawdata.sqlite\")\n",
"parquet_path = Path(r\"/home/edgekit/MSY_FS/fahrsimulator_msy2526_ai/files_for_testing/both_mod_0000.parquet\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b1aa9398",
"metadata": {},
"outputs": [],
"source": [
"dataset = pd.read_parquet(parquet_path)\n",
"dataset.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b183746e",
"metadata": {},
"outputs": [],
"source": [
"dataset.dtypes"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "24ed769d",
"metadata": {},
"outputs": [],
"source": [
"con, cursor = db_helpers.connect_db(database_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e604ed30",
"metadata": {},
"outputs": [],
"source": [
"df_clean = dataset.drop(columns=['subjectID','rowID', 'STUDY', 'LEVEL', 'PHASE'])\n",
"df_first_100 = df_clean.head(200)\n",
"df_first_100 = df_first_100.reset_index(drop=True)\n",
"df_first_100.insert(0, '_Id', df_first_100.index + 1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e77a812e",
"metadata": {},
"outputs": [],
"source": [
"def pandas_to_sqlite_dtype(dtype):\n",
" if pd.api.types.is_integer_dtype(dtype):\n",
" return \"INTEGER\"\n",
" if pd.api.types.is_float_dtype(dtype):\n",
" return \"REAL\"\n",
" if pd.api.types.is_bool_dtype(dtype):\n",
" return \"INTEGER\"\n",
" if pd.api.types.is_datetime64_any_dtype(dtype):\n",
" return \"TEXT\"\n",
" return \"TEXT\"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0e8897b2",
"metadata": {},
"outputs": [],
"source": [
"columns = {\n",
" col: pandas_to_sqlite_dtype(dtype)\n",
" for col, dtype in df_first_100.dtypes.items()\n",
"}\n",
"\n",
"constraints = {\n",
" \"_Id\": [\"NOT NULL\"]\n",
"}\n",
"\n",
"primary_key = {\n",
" \"pk_df_first_100\": [\"_Id\"]\n",
"}\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4ab57624",
"metadata": {},
"outputs": [],
"source": [
"sql = db_helpers.create_table(\n",
" conn=con,\n",
" cursor=cursor,\n",
" table_name=\"rawdata\",\n",
" columns=columns,\n",
" constraints=constraints,\n",
" primary_key=primary_key,\n",
" commit=True\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "25096a7f",
"metadata": {},
"outputs": [],
"source": [
"columns_to_insert = {\n",
" col: df_first_100[col].tolist()\n",
" for col in df_first_100.columns\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7a5a3aa8",
"metadata": {},
"outputs": [],
"source": [
"db_helpers.insert_rows_into_table(\n",
" conn=con,\n",
" cursor=cursor,\n",
" table_name=\"rawdata\",\n",
" columns=columns_to_insert,\n",
" commit=True\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b56beae2",
"metadata": {},
"outputs": [],
"source": [
"a = db_helpers.get_data_from_table(conn=con, table_name='rawdata',columns_list=['*'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a4a74a9d",
"metadata": {},
"outputs": [],
"source": [
"a.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "da0f8737",
"metadata": {},
"outputs": [],
"source": [
"db_helpers.disconnect_db(con, cursor)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "MSY_FS_env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}