diff --git a/EDA.ipynb b/EDA.ipynb index 58e7260..f389458 100644 --- a/EDA.ipynb +++ b/EDA.ipynb @@ -23,7 +23,7 @@ "metadata": {}, "outputs": [], "source": [ - "file_path = \"adabase-public-0001-v_0_0_2.h5py\"" + "file_path = \"adabase-public-0020-v_0_0_2.h5py\"" ] }, { @@ -51,6 +51,125 @@ "df_signals = pd.read_hdf(file_path, \"SIGNALS\", mode=\"r\")\n" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "b7f494d1", + "metadata": {}, + "outputs": [], + "source": [ + "pd.set_option('display.max_columns', None)\n", + "pd.set_option('display.max_rows', None)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dd2f4d84", + "metadata": {}, + "outputs": [], + "source": [ + "settings = df_signals[['STUDY','PHASE','LEVEL']]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1699ddc2", + "metadata": {}, + "outputs": [], + "source": [ + "settings.value_counts()" + ] + }, + { + "cell_type": "markdown", + "id": "a4731c56", + "metadata": {}, + "source": [ + "Actions units" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9db0b4b2", + "metadata": {}, + "outputs": [], + "source": [ + "df_signals.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3ceccc89", + "metadata": {}, + "outputs": [], + "source": [ + "au_data = df_signals.iloc[:,-20:]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3d4ee088", + "metadata": {}, + "outputs": [], + "source": [ + "au_data.tail()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5d85a8cb", + "metadata": {}, + "outputs": [], + "source": [ + "print(au_data.shape)\n", + "print(au_data.isna().sum())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "efff356f", + "metadata": {}, + "outputs": [], + "source": [ + "clean_au_data = au_data.dropna()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "42ed1bcd", + "metadata": {}, + "outputs": [], + "source": [ + "clean_au_data.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2c7c3f14", + "metadata": {}, + "outputs": [], + "source": [ + "for i in range(len(clean_au_data.columns)):\n", + " print(clean_au_data.iloc[:,i].unique())" + ] + }, + { + "cell_type": "markdown", + "id": "332740a8", + "metadata": {}, + "source": [ + "Plots" + ] + }, { "cell_type": "code", "execution_count": null,