From 10fdafa2444d89855634045385dda74c23135884 Mon Sep 17 00:00:00 2001 From: Michael Date: Tue, 10 Mar 2026 19:17:08 +0100 Subject: [PATCH] added prediction env --- predict_pipeline/prediction_env.yaml | 196 +++++++++++++++++++++++++++ project_report.md | 48 +++---- readme.md | 6 +- 3 files changed, 223 insertions(+), 27 deletions(-) create mode 100644 predict_pipeline/prediction_env.yaml diff --git a/predict_pipeline/prediction_env.yaml b/predict_pipeline/prediction_env.yaml new file mode 100644 index 0000000..2f28932 --- /dev/null +++ b/predict_pipeline/prediction_env.yaml @@ -0,0 +1,196 @@ +name: 'prediction_env' +channels: + - defaults + - conda-forge +dependencies: + - _py-xgboost-mutex=2.0=cpu_2 + - absl-py=2.3.1=py310haa95532_0 + - aom=3.12.1=h00a0c3c_0 + - arrow-cpp=21.0.0=hcdc3a1c_2 + - asttokens=3.0.1=pyhd8ed1ab_0 + - astunparse=1.6.3=py_0 + - aws-c-auth=0.9.0=h02ab6af_2 + - aws-c-cal=0.9.2=h02ab6af_1 + - aws-c-common=0.12.4=h02ab6af_0 + - aws-c-compression=0.3.1=h02ab6af_2 + - aws-c-event-stream=0.5.6=h02ab6af_0 + - aws-c-http=0.10.4=h02ab6af_0 + - aws-c-io=0.21.4=h02ab6af_0 + - aws-c-mqtt=0.13.3=h02ab6af_0 + - aws-c-s3=0.8.7=h02ab6af_0 + - aws-c-sdkutils=0.2.4=h02ab6af_1 + - aws-checksums=0.2.7=h02ab6af_1 + - aws-crt-cpp=0.34.0=h885b0b7_0 + - aws-sdk-cpp=1.11.638=hf0af688_0 + - blas=1.0=mkl + - brotlicffi=1.2.0.0=py310h885b0b7_0 + - bzip2=1.0.8=h2bbff1b_6 + - c-ares=1.34.6=h2c209ce_0 + - ca-certificates=2026.1.4=h4c7d964_0 + - cairo=1.18.4=he9e932c_0 + - certifi=2026.01.04=py310haa95532_0 + - cffi=2.0.0=py310h02ab6af_1 + - charset-normalizer=3.4.4=py310haa95532_0 + - colorama=0.4.6=pyhd8ed1ab_1 + - comm=0.2.3=pyhe01879c_0 + - dav1d=1.2.1=h2bbff1b_0 + - debugpy=1.8.20=py310h699e580_0 + - decorator=5.2.1=pyhd8ed1ab_0 + - exceptiongroup=1.3.1=pyhd8ed1ab_0 + - executing=2.2.1=pyhd8ed1ab_0 + - expat=2.7.4=hd7fb8db_0 + - flatbuffers=24.3.25=h21716d4_0 + - fontconfig=2.15.0=hd211d86_0 + - freeglut=3.8.0=hfcef157_0 + - freetype=2.14.1=hfbffc0b_0 + - fribidi=1.0.16=haf45083_0 + - gast=0.7.0=pyhd3eb1b0_0 + - gflags=2.2.2=hd77b12b_1 + - giflib=5.2.2=h7edc060_0 + - glog=0.5.0=hd77b12b_1 + - google-pasta=0.2.0=pyhd3eb1b0_0 + - graphite2=1.3.14=hd77b12b_1 + - grpcio=1.74.1=py310h5c751cc_0 + - h5py=3.15.1=py310he283ef2_1 + - harfbuzz=12.3.0=h3ef6528_1 + - hdf5=1.14.5=ha36df97_2 + - icc_rt=2022.1.0=h6049295_2 + - icu=73.1=h6c2663c_0 + - idna=3.11=py310haa95532_0 + - intel-openmp=2025.0.0=haa95532_1164 + - ipykernel=7.2.0=pyh6dadd2b_1 + - ipython=8.37.0=pyha7b4d00_0 + - jedi=0.19.2=pyhd8ed1ab_1 + - joblib=1.5.3=py310haa95532_0 + - jpeg=9f=ha349fce_0 + - jupyter_client=8.8.0=pyhcf101f3_0 + - jupyter_core=5.9.1=pyh6dadd2b_0 + - keras=3.11.2=py310h51baaa3_0 + - krb5=1.21.3=hdf4eb48_0 + - lcms2=2.17=h3732fa5_0 + - lerc=4.0.0=h5da7b33_0 + - libabseil=20250814.1=cxx17_hcd311fc_0 + - libavif=1.3.0=h5bd13ec_0 + - libbrotlicommon=1.2.0=h907acca_0 + - libbrotlidec=1.2.0=h02c67a5_0 + - libbrotlienc=1.2.0=h483e6b9_0 + - libcurl=8.17.0=h6e672f4_1 + - libdeflate=1.22=h5bf469e_0 + - libexpat=2.7.4=hd7fb8db_0 + - libffi=3.4.4=hd77b12b_1 + - libglib=2.86.3=h9bccc14_0 + - libgrpc=1.74.1=hde67744_0 + - libhwloc=2.12.1=default_hfa10c62_1000 + - libiconv=1.16=h2bbff1b_3 + - libkrb5=1.22.1=hb237eb7_0 + - libopenjpeg=2.5.4=h02ab6af_1 + - libpng=1.6.54=ha15c746_0 + - libprotobuf=6.33.0=h2a56892_1 + - libre2-11=2025.11.05=ha6b10e7_0 + - libsodium=1.0.20=hc70643c_0 + - libssh2=1.11.1=h2addb87_0 + - libthrift=0.22.0=ha2884a9_0 + - libtiff=4.7.1=h3a18249_0 + - libwebp-base=1.6.0=hbf3958f_0 + - libxgboost=3.1.2=h585ebfc_0 + - libxml2=2.13.9=h6201b9f_0 + - libzlib=1.3.1=h02ab6af_0 + - lz4-c=1.9.4=h2bbff1b_1 + - m2w64-gcc-libgfortran=5.3.0=6 + - m2w64-gcc-libs=5.3.0=7 + - m2w64-gcc-libs-core=5.3.0=7 + - m2w64-gmp=6.1.0=2 + - m2w64-libwinpthread-git=5.0.0.4634.697f757=2 + - markdown=3.10=py310haa95532_0 + - markdown-it-py=2.2.0=py310haa95532_1 + - markupsafe=3.0.2=py310h827c3e9_0 + - matplotlib-inline=0.2.1=pyhd8ed1ab_0 + - mdurl=0.1.2=py310haa95532_0 + - mkl=2025.0.0=h5da7b33_930 + - mkl-service=2.5.2=py310h0b37514_0 + - mkl_fft=2.1.1=py310h300f80d_0 + - mkl_random=1.3.0=py310ha5e6156_0 + - ml_dtypes=0.5.4=py310h42c1672_0 + - mpi=1.0=msmpi + - mpi4py=4.0.3=py310h02ab6af_1 + - msmpi=10.1.1=had4844c_0 + - msys2-conda-epoch=20160418=1 + - namex=0.1.0=py310haa95532_0 + - nest-asyncio=1.6.0=pyhd8ed1ab_1 + - numpy-base=2.1.3=py310he4e2855_3 + - openssl=3.6.1=hf411b9b_1 + - opt_einsum=3.3.0=pyhd3eb1b0_1 + - optree=0.18.0=py310h03f52e7_0 + - orc=2.2.0=h79e1e1e_1 + - packaging=25.0=py310haa95532_1 + - paho-mqtt=2.1.0=pyhe01879c_1 + - parso=0.8.6=pyhcf101f3_0 + - pcre2=10.46=h5740b90_0 + - pickleshare=0.7.5=pyhd8ed1ab_1004 + - pillow=12.1.0=py310h6b7a805_0 + - pip=26.0.1=pyhc872135_0 + - pixman=0.46.4=h4043f72_0 + - platformdirs=4.9.2=pyhcf101f3_0 + - prompt-toolkit=3.0.52=pyha770c72_0 + - protobuf=6.33.0=py310ha4c6e68_0 + - psutil=7.2.2=py310h1637853_0 + - pure_eval=0.2.3=pyhd8ed1ab_1 + - py-xgboost=3.1.2=py310haa95532_0 + - pyarrow=21.0.0=py310h42c1672_1 + - pycparser=2.23=py310haa95532_0 + - pygments=2.19.2=pyhd8ed1ab_0 + - pysocks=1.7.1=py310haa95532_1 + - python=3.10.19=h981015d_0 + - python-dateutil=2.9.0.post0=pyhe01879c_2 + - python-flatbuffers=24.3.25=py310haa95532_0 + - python_abi=3.10=2_cp310 + - pywin32=311=py310h282bd7d_1 + - pyyaml=6.0.3=py310hb9a58be_0 + - pyzmq=27.1.0=py310h535538e_0 + - re2=2025.11.05=hc24cdf5_0 + - requests=2.32.5=py310haa95532_1 + - rich=14.2.0=py310haa95532_0 + - scipy=1.15.3=py310h1bbe36f_1 + - setuptools=80.10.2=py310haa95532_0 + - six=1.17.0=pyhe01879c_1 + - snappy=1.2.2=hab6b7b3_1 + - sqlite=3.51.1=hda9a48d_0 + - stack_data=0.6.3=pyhd8ed1ab_1 + - tbb=2022.3.0=h90c84d6_0 + - tbb-devel=2022.3.0=h90c84d6_0 + - tensorboard=2.20.0=py310haa95532_0 + - tensorboard-data-server=0.7.0=py310haa95532_1 + - tensorflow=2.20.0=cpu_py310h6605a60_0 + - tensorflow-base=2.20.0=cpu_py310hce87ebc_0 + - termcolor=3.2.0=py310haa95532_0 + - threadpoolctl=3.5.0=py310h4442805_1 + - tk=8.6.15=hf199647_0 + - tornado=6.5.4=py310h29418f3_0 + - traitlets=5.14.3=pyhd8ed1ab_1 + - typing-extensions=4.15.0=py310haa95532_0 + - typing_extensions=4.15.0=py310haa95532_0 + - ucrt=10.0.22621.0=haa95532_0 + - urllib3=2.6.3=py310haa95532_0 + - utf8proc=2.6.1=h2bbff1b_1 + - vc=14.42=haa95532_5 + - vc14_runtime=14.44.35208=h4927774_10 + - vs2015_runtime=14.44.35208=ha6b5a95_10 + - wcwidth=0.6.0=pyhd8ed1ab_0 + - werkzeug=3.1.3=py310haa95532_0 + - wheel=0.46.3=py310haa95532_0 + - win_inet_pton=1.1.0=py310haa95532_1 + - wrapt=2.0.1=py310h02ab6af_0 + - xgboost=3.1.2=py310haa95532_0 + - xz=5.6.4=h4754444_1 + - yaml=0.2.5=he774522_0 + - zeromq=4.3.5=h5bddc39_9 + - zlib=1.3.1=h02ab6af_0 + - zstd=1.5.7=h56299aa_0 + - pip: + - numpy==1.24.4 + - pandas==2.3.0 + - pyocclient==0.6 + - pytz==2025.2 + - scikit-learn==1.6.1 + - tzdata==2025.3 +prefix: C:\Users\micha\anaconda3\envs\310 diff --git a/project_report.md b/project_report.md index 4856f6c..4b35b3e 100644 --- a/project_report.md +++ b/project_report.md @@ -97,7 +97,7 @@ Included model families: Supporting utilities in ```model_training/tools```: - `scaler.py`: Functions to fit, transform, save and load either MinMaxScaler or StandardScaler, subject-wise and globally - for new subjects, a fallback scaler (using mean of all subjects scaling parameters) is used -- `performance_split.py`: Provides a function to split a group of subjects based on their performance in the AdaBase experiments, based on the results created in `researchOnSubjectPerformance.ipynb` +- `performance_split.py`: Provides a function to split a group of subjects based on their performance in the AdaBase experiments, based on the results created in `researchOnSubjectPerformance.ipynb`. To split into three groups for train, validation & test, call the function twice - `mad_outlier_removal.py`: Functions to fit and transform data with MAD outlier removal - `evaluation_tools.py`: Especially used for Isolation Forest, Functions for ROC curve as well as confusion matrix @@ -115,18 +115,18 @@ Main script: Pipeline: - Loads runtime config (`predict_pipeline/config.yaml`) -- Pulls latest row from SQLite (`database.path/table/key`) -- Replaces missing values using `fallback` map -- Optionally applies scaler (`.pkl`/`.joblib`) +- Pulls latest row from SQLite +- Replaces missing values using `fallback` map from config file - if more than 50% of values need to be replaced, the sample is dropped and "valid=False" +- Optionally applies scaler (`.pkl`/`.joblib`) - set via config file - Loads model (`.keras`, `.pkl`, `.joblib`) and predicts - Publishes JSON payload to MQTT topic Expected payload form: ```json { - "valid": true, - "_id": 123, - "prediction": 0 + "valid": true, # false only if too many signals are invalid + "_id": 123, # this is the sample ID from the database + "prediction": 0 # 0 for low load, 1 for high load } ``` @@ -135,11 +135,12 @@ Expected payload form: Files: - `predict_pipeline/predict.service` - `predict_pipeline/predict.timer` -- `predict_pipeline/predict_service_timer_documentation.md` Role: - Run inference repeatedly without manual execution -- Timer/service configuration can be customized per target machine +- Timer/service configuration can be customized + +More information on how to use and interact with the system service and timer can be found in `predict_service_timer_documentation.md` ## 5.2 Runtime Configuration @@ -155,23 +156,26 @@ Sections: - `fallback`: default values for NaN replacement Important: -- The repository currently uses environment-specific absolute paths in some scripts/configs. -- Paths should be normalized before deployment to a new machine. +- The repository currently uses environment-specific absolute paths in some scripts/configs to ensure functionality on Ohm-UX driving simulator. -## 5.3) Data and Feature Expectations + +## 5.3 Data and Feature Expectations Prediction expects SQLite rows containing: - `_Id` -- `start_time` +- `start_time` - this is not yet used for either predictions or messages - All configured model features (AUs + eye metrics) -Common feature groups: +Common feature groups (similar to own dataset): - `FACE_AUxx_mean` columns - Fixation counters and duration statistics - Saccade count/amplitude/duration statistics - Blink count/duration statistics - Pupil mean and IPA +## 5.4 Create database from scratch +To (re-)create the custom database for deployment, use `fill_db.ipynb`. Enter the path to your dataset, drop unnecessary columns and insert a subset of data with tool functions from `tools/db_helpers` + ## 6) Installation and Dependencies Due to unsolvable dependency conflicts, several environemnts need to be used in the same time. ### 6.1 Environemnt for camera handling @@ -179,16 +183,8 @@ TO DO ### 6.2 Environment for predictions -Install base requirements: -```bash -pip install -r requirements.txt -``` - -Typical key packages in this project: -- `numpy`, `pandas`, `scikit-learn`, `scipy`, `pyarrow`, `pyyaml`, `joblib` -- `opencv-python`, `mediapipe`, `torch`, `py-feat`, `pygazeanalyser` -- `paho-mqtt` -- optional data access stack (`pyocclient`, `h5py`, `tables`) +If you want to use the existing deployment on Ohm-UX driving simulator's jetson board, activate conda environment `p310_FS_TF`, a python 3.10 environment including tensorflow and all other packages required to run `predict_sample.py` +Otherwise, as described in `readme.md: Setup`, you can use `prediction_env.yaml`, to create a new environment that fulfills the requirements. ## 7) Repository File Inventory @@ -201,10 +197,10 @@ Typical key packages in this project: ### Dataset Creation -- `dataset_creation/parquet_file_creation.py` - local source to parquet conversion +- `dataset_creation/parquet_file_creation.py` - local files to parquet conversion - `dataset_creation/create_parquet_files_from_owncloud.py` - ownCloud download + parquet conversion - `dataset_creation/combined_feature_creation.py` - sliding-window multimodal feature generation -- `dataset_creation/maxDist.py` - helper/statistical utility script +- `dataset_creation/maxDist.py` - helper/statistical utility script for eye-tracking feature creation #### AU Creation - `dataset_creation/AU_creation/AU_creation_service.py` - AU extraction service workflow diff --git a/readme.md b/readme.md index 7c2c6a7..e10263d 100644 --- a/readme.md +++ b/readme.md @@ -11,11 +11,15 @@ Activate the conda-repository "". ```bash conda activate ``` -Make sure, another environment that fulfills requirement.txt is available, matching with predict_pipeline/predict.service - See `predict_pipeline/predict_service_timer_documentation.md` +Make sure, another environment that fulfills prediction_env.yaml is available, matching with predict_pipeline/predict.service - See `predict_pipeline/predict_service_timer_documentation.md` To get an overview over all available conda environments on your device, use this command in anaconda prompt terminal: ```bash conda info --envs ``` +Create a new environment based on the yaml-file: +```bash + conda env create -f prediction_env.yaml +``` ### 2) Camera AU + Eye Pipeline (`camera_stream_AU_and_ET_new.py`)