{ "cells": [ { "cell_type": "markdown", "id": "fb68b447", "metadata": {}, "source": [ "## Database creation and filling (for live system) " ] }, { "cell_type": "code", "execution_count": null, "id": "0d70a13f", "metadata": {}, "outputs": [], "source": [ "import sys\n", "sys.path.append('/home/edgekit/MSY_FS/fahrsimulator_msy2526_ai/tools')\n", "import pandas as pd\n", "from pathlib import Path\n", "import db_helpers" ] }, { "cell_type": "code", "execution_count": null, "id": "ce696366", "metadata": {}, "outputs": [], "source": [ "# TODO: set paths and table name\n", "database_path = Path(r\"database.sqlite\") # this path references an empty, but already created sqlite file\n", "parquet_path = Path(r\"...parquet\") # this path leads to the data that should be used to fill the databse\n", "table_name = \"XXX\" # name of the new table" ] }, { "cell_type": "code", "execution_count": null, "id": "b1aa9398", "metadata": {}, "outputs": [], "source": [ "dataset = pd.read_parquet(parquet_path)\n", "dataset.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "b183746e", "metadata": {}, "outputs": [], "source": [ "dataset.dtypes" ] }, { "cell_type": "code", "execution_count": null, "id": "24ed769d", "metadata": {}, "outputs": [], "source": [ "con, cursor = db_helpers.connect_db(database_path)" ] }, { "cell_type": "markdown", "id": "7007c68f", "metadata": {}, "source": [ "Select a subset to insert into database " ] }, { "cell_type": "code", "execution_count": null, "id": "e604ed30", "metadata": {}, "outputs": [], "source": [ "df_clean = dataset.drop(columns=['subjectID','rowID', 'STUDY', 'LEVEL', 'PHASE'])\n", "df_first_100 = df_clean.head(200)\n", "df_first_100 = df_first_100.reset_index(drop=True)\n", "df_first_100.insert(0, '_Id', df_first_100.index + 1)" ] }, { "cell_type": "markdown", "id": "92171186", "metadata": {}, "source": [ "Type conversion" ] }, { "cell_type": "code", "execution_count": null, "id": "e77a812e", "metadata": {}, "outputs": [], "source": [ "def pandas_to_sqlite_dtype(dtype):\n", " if pd.api.types.is_integer_dtype(dtype):\n", " return \"INTEGER\"\n", " if pd.api.types.is_float_dtype(dtype):\n", " return \"REAL\"\n", " if pd.api.types.is_bool_dtype(dtype):\n", " return \"INTEGER\"\n", " if pd.api.types.is_datetime64_any_dtype(dtype):\n", " return \"TEXT\"\n", " return \"TEXT\"\n" ] }, { "cell_type": "markdown", "id": "45af9956", "metadata": {}, "source": [ "Define constraints and primary key" ] }, { "cell_type": "code", "execution_count": null, "id": "0e8897b2", "metadata": {}, "outputs": [], "source": [ "columns = {\n", " col: pandas_to_sqlite_dtype(dtype)\n", " for col, dtype in df_first_100.dtypes.items()\n", "}\n", "\n", "constraints = {\n", " \"_Id\": [\"NOT NULL\"]\n", "}\n", "\n", "primary_key = {\n", " \"pk_df_first_100\": [\"_Id\"]\n", "}\n" ] }, { "cell_type": "markdown", "id": "133e92ee", "metadata": {}, "source": [ "Create the table" ] }, { "cell_type": "code", "execution_count": null, "id": "4ab57624", "metadata": {}, "outputs": [], "source": [ "sql = db_helpers.create_table(\n", " conn=con,\n", " cursor=cursor,\n", " table_name=table_name,\n", " columns=columns,\n", " constraints=constraints,\n", " primary_key=primary_key,\n", " commit=True\n", ")\n" ] }, { "cell_type": "code", "execution_count": null, "id": "25096a7f", "metadata": {}, "outputs": [], "source": [ "columns_to_insert = {\n", " col: df_first_100[col].tolist()\n", " for col in df_first_100.columns\n", "}" ] }, { "cell_type": "code", "execution_count": null, "id": "7a5a3aa8", "metadata": {}, "outputs": [], "source": [ "db_helpers.insert_rows_into_table(\n", " conn=con,\n", " cursor=cursor,\n", " table_name=table_name,\n", " columns=columns_to_insert,\n", " commit=True\n", ")\n" ] }, { "cell_type": "code", "execution_count": null, "id": "b56beae2", "metadata": {}, "outputs": [], "source": [ "request = db_helpers.get_data_from_table(conn=con, table_name='rawdata',columns_list=['*'])" ] }, { "cell_type": "code", "execution_count": null, "id": "a4a74a9d", "metadata": {}, "outputs": [], "source": [ "request.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "da0f8737", "metadata": {}, "outputs": [], "source": [ "db_helpers.disconnect_db(con, cursor)" ] } ], "metadata": { "kernelspec": { "display_name": "310", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.19" } }, "nbformat": 4, "nbformat_minor": 5 }