{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "0d70a13f", "metadata": {}, "outputs": [], "source": [ "import sys\n", "sys.path.append('/home/edgekit/MSY_FS/fahrsimulator_msy2526_ai/tools')\n", "import pandas as pd\n", "from pathlib import Path\n", "import db_helpers" ] }, { "cell_type": "code", "execution_count": null, "id": "ce696366", "metadata": {}, "outputs": [], "source": [ "database_path = Path(r\"/home/edgekit/MSY_FS/databases/rawdata.sqlite\")\n", "parquet_path = Path(r\"/home/edgekit/MSY_FS/fahrsimulator_msy2526_ai/files_for_testing/both_mod_0000.parquet\")" ] }, { "cell_type": "code", "execution_count": null, "id": "b1aa9398", "metadata": {}, "outputs": [], "source": [ "dataset = pd.read_parquet(parquet_path)\n", "dataset.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "b183746e", "metadata": {}, "outputs": [], "source": [ "dataset.dtypes" ] }, { "cell_type": "code", "execution_count": null, "id": "24ed769d", "metadata": {}, "outputs": [], "source": [ "con, cursor = db_helpers.connect_db(database_path)" ] }, { "cell_type": "code", "execution_count": null, "id": "e604ed30", "metadata": {}, "outputs": [], "source": [ "df_clean = dataset.drop(columns=['subjectID','rowID', 'STUDY', 'LEVEL', 'PHASE'])\n", "df_first_100 = df_clean.head(200)\n", "df_first_100 = df_first_100.reset_index(drop=True)\n", "df_first_100.insert(0, '_Id', df_first_100.index + 1)" ] }, { "cell_type": "code", "execution_count": null, "id": "e77a812e", "metadata": {}, "outputs": [], "source": [ "def pandas_to_sqlite_dtype(dtype):\n", " if pd.api.types.is_integer_dtype(dtype):\n", " return \"INTEGER\"\n", " if pd.api.types.is_float_dtype(dtype):\n", " return \"REAL\"\n", " if pd.api.types.is_bool_dtype(dtype):\n", " return \"INTEGER\"\n", " if pd.api.types.is_datetime64_any_dtype(dtype):\n", " return \"TEXT\"\n", " return \"TEXT\"\n" ] }, { "cell_type": "code", "execution_count": null, "id": "0e8897b2", "metadata": {}, "outputs": [], "source": [ "columns = {\n", " col: pandas_to_sqlite_dtype(dtype)\n", " for col, dtype in df_first_100.dtypes.items()\n", "}\n", "\n", "constraints = {\n", " \"_Id\": [\"NOT NULL\"]\n", "}\n", "\n", "primary_key = {\n", " \"pk_df_first_100\": [\"_Id\"]\n", "}\n" ] }, { "cell_type": "code", "execution_count": null, "id": "4ab57624", "metadata": {}, "outputs": [], "source": [ "sql = db_helpers.create_table(\n", " conn=con,\n", " cursor=cursor,\n", " table_name=\"rawdata\",\n", " columns=columns,\n", " constraints=constraints,\n", " primary_key=primary_key,\n", " commit=True\n", ")\n" ] }, { "cell_type": "code", "execution_count": null, "id": "25096a7f", "metadata": {}, "outputs": [], "source": [ "columns_to_insert = {\n", " col: df_first_100[col].tolist()\n", " for col in df_first_100.columns\n", "}" ] }, { "cell_type": "code", "execution_count": null, "id": "7a5a3aa8", "metadata": {}, "outputs": [], "source": [ "db_helpers.insert_rows_into_table(\n", " conn=con,\n", " cursor=cursor,\n", " table_name=\"rawdata\",\n", " columns=columns_to_insert,\n", " commit=True\n", ")\n" ] }, { "cell_type": "code", "execution_count": null, "id": "b56beae2", "metadata": {}, "outputs": [], "source": [ "a = db_helpers.get_data_from_table(conn=con, table_name='rawdata',columns_list=['*'])" ] }, { "cell_type": "code", "execution_count": null, "id": "a4a74a9d", "metadata": {}, "outputs": [], "source": [ "a.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "da0f8737", "metadata": {}, "outputs": [], "source": [ "db_helpers.disconnect_db(con, cursor)" ] } ], "metadata": { "kernelspec": { "display_name": "MSY_FS_env", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.12" } }, "nbformat": 4, "nbformat_minor": 5 }