From 10fdafa2444d89855634045385dda74c23135884 Mon Sep 17 00:00:00 2001
From: Michael <weigmi87303@th-nuernberg.de>
Date: Tue, 10 Mar 2026 19:17:08 +0100
Subject: [PATCH] added prediction env

---
 predict_pipeline/prediction_env.yaml | 196 +++++++++++++++++++++++++++
 project_report.md                    |  48 +++----
 readme.md                            |   6 +-
 3 files changed, 223 insertions(+), 27 deletions(-)
 create mode 100644 predict_pipeline/prediction_env.yaml

diff --git a/predict_pipeline/prediction_env.yaml b/predict_pipeline/prediction_env.yaml
new file mode 100644
index 0000000..2f28932
--- /dev/null
+++ b/predict_pipeline/prediction_env.yaml
@@ -0,0 +1,196 @@
+name: 'prediction_env'
+channels:
+  - defaults
+  - conda-forge
+dependencies:
+  - _py-xgboost-mutex=2.0=cpu_2
+  - absl-py=2.3.1=py310haa95532_0
+  - aom=3.12.1=h00a0c3c_0
+  - arrow-cpp=21.0.0=hcdc3a1c_2
+  - asttokens=3.0.1=pyhd8ed1ab_0
+  - astunparse=1.6.3=py_0
+  - aws-c-auth=0.9.0=h02ab6af_2
+  - aws-c-cal=0.9.2=h02ab6af_1
+  - aws-c-common=0.12.4=h02ab6af_0
+  - aws-c-compression=0.3.1=h02ab6af_2
+  - aws-c-event-stream=0.5.6=h02ab6af_0
+  - aws-c-http=0.10.4=h02ab6af_0
+  - aws-c-io=0.21.4=h02ab6af_0
+  - aws-c-mqtt=0.13.3=h02ab6af_0
+  - aws-c-s3=0.8.7=h02ab6af_0
+  - aws-c-sdkutils=0.2.4=h02ab6af_1
+  - aws-checksums=0.2.7=h02ab6af_1
+  - aws-crt-cpp=0.34.0=h885b0b7_0
+  - aws-sdk-cpp=1.11.638=hf0af688_0
+  - blas=1.0=mkl
+  - brotlicffi=1.2.0.0=py310h885b0b7_0
+  - bzip2=1.0.8=h2bbff1b_6
+  - c-ares=1.34.6=h2c209ce_0
+  - ca-certificates=2026.1.4=h4c7d964_0
+  - cairo=1.18.4=he9e932c_0
+  - certifi=2026.01.04=py310haa95532_0
+  - cffi=2.0.0=py310h02ab6af_1
+  - charset-normalizer=3.4.4=py310haa95532_0
+  - colorama=0.4.6=pyhd8ed1ab_1
+  - comm=0.2.3=pyhe01879c_0
+  - dav1d=1.2.1=h2bbff1b_0
+  - debugpy=1.8.20=py310h699e580_0
+  - decorator=5.2.1=pyhd8ed1ab_0
+  - exceptiongroup=1.3.1=pyhd8ed1ab_0
+  - executing=2.2.1=pyhd8ed1ab_0
+  - expat=2.7.4=hd7fb8db_0
+  - flatbuffers=24.3.25=h21716d4_0
+  - fontconfig=2.15.0=hd211d86_0
+  - freeglut=3.8.0=hfcef157_0
+  - freetype=2.14.1=hfbffc0b_0
+  - fribidi=1.0.16=haf45083_0
+  - gast=0.7.0=pyhd3eb1b0_0
+  - gflags=2.2.2=hd77b12b_1
+  - giflib=5.2.2=h7edc060_0
+  - glog=0.5.0=hd77b12b_1
+  - google-pasta=0.2.0=pyhd3eb1b0_0
+  - graphite2=1.3.14=hd77b12b_1
+  - grpcio=1.74.1=py310h5c751cc_0
+  - h5py=3.15.1=py310he283ef2_1
+  - harfbuzz=12.3.0=h3ef6528_1
+  - hdf5=1.14.5=ha36df97_2
+  - icc_rt=2022.1.0=h6049295_2
+  - icu=73.1=h6c2663c_0
+  - idna=3.11=py310haa95532_0
+  - intel-openmp=2025.0.0=haa95532_1164
+  - ipykernel=7.2.0=pyh6dadd2b_1
+  - ipython=8.37.0=pyha7b4d00_0
+  - jedi=0.19.2=pyhd8ed1ab_1
+  - joblib=1.5.3=py310haa95532_0
+  - jpeg=9f=ha349fce_0
+  - jupyter_client=8.8.0=pyhcf101f3_0
+  - jupyter_core=5.9.1=pyh6dadd2b_0
+  - keras=3.11.2=py310h51baaa3_0
+  - krb5=1.21.3=hdf4eb48_0
+  - lcms2=2.17=h3732fa5_0
+  - lerc=4.0.0=h5da7b33_0
+  - libabseil=20250814.1=cxx17_hcd311fc_0
+  - libavif=1.3.0=h5bd13ec_0
+  - libbrotlicommon=1.2.0=h907acca_0
+  - libbrotlidec=1.2.0=h02c67a5_0
+  - libbrotlienc=1.2.0=h483e6b9_0
+  - libcurl=8.17.0=h6e672f4_1
+  - libdeflate=1.22=h5bf469e_0
+  - libexpat=2.7.4=hd7fb8db_0
+  - libffi=3.4.4=hd77b12b_1
+  - libglib=2.86.3=h9bccc14_0
+  - libgrpc=1.74.1=hde67744_0
+  - libhwloc=2.12.1=default_hfa10c62_1000
+  - libiconv=1.16=h2bbff1b_3
+  - libkrb5=1.22.1=hb237eb7_0
+  - libopenjpeg=2.5.4=h02ab6af_1
+  - libpng=1.6.54=ha15c746_0
+  - libprotobuf=6.33.0=h2a56892_1
+  - libre2-11=2025.11.05=ha6b10e7_0
+  - libsodium=1.0.20=hc70643c_0
+  - libssh2=1.11.1=h2addb87_0
+  - libthrift=0.22.0=ha2884a9_0
+  - libtiff=4.7.1=h3a18249_0
+  - libwebp-base=1.6.0=hbf3958f_0
+  - libxgboost=3.1.2=h585ebfc_0
+  - libxml2=2.13.9=h6201b9f_0
+  - libzlib=1.3.1=h02ab6af_0
+  - lz4-c=1.9.4=h2bbff1b_1
+  - m2w64-gcc-libgfortran=5.3.0=6
+  - m2w64-gcc-libs=5.3.0=7
+  - m2w64-gcc-libs-core=5.3.0=7
+  - m2w64-gmp=6.1.0=2
+  - m2w64-libwinpthread-git=5.0.0.4634.697f757=2
+  - markdown=3.10=py310haa95532_0
+  - markdown-it-py=2.2.0=py310haa95532_1
+  - markupsafe=3.0.2=py310h827c3e9_0
+  - matplotlib-inline=0.2.1=pyhd8ed1ab_0
+  - mdurl=0.1.2=py310haa95532_0
+  - mkl=2025.0.0=h5da7b33_930
+  - mkl-service=2.5.2=py310h0b37514_0
+  - mkl_fft=2.1.1=py310h300f80d_0
+  - mkl_random=1.3.0=py310ha5e6156_0
+  - ml_dtypes=0.5.4=py310h42c1672_0
+  - mpi=1.0=msmpi
+  - mpi4py=4.0.3=py310h02ab6af_1
+  - msmpi=10.1.1=had4844c_0
+  - msys2-conda-epoch=20160418=1
+  - namex=0.1.0=py310haa95532_0
+  - nest-asyncio=1.6.0=pyhd8ed1ab_1
+  - numpy-base=2.1.3=py310he4e2855_3
+  - openssl=3.6.1=hf411b9b_1
+  - opt_einsum=3.3.0=pyhd3eb1b0_1
+  - optree=0.18.0=py310h03f52e7_0
+  - orc=2.2.0=h79e1e1e_1
+  - packaging=25.0=py310haa95532_1
+  - paho-mqtt=2.1.0=pyhe01879c_1
+  - parso=0.8.6=pyhcf101f3_0
+  - pcre2=10.46=h5740b90_0
+  - pickleshare=0.7.5=pyhd8ed1ab_1004
+  - pillow=12.1.0=py310h6b7a805_0
+  - pip=26.0.1=pyhc872135_0
+  - pixman=0.46.4=h4043f72_0
+  - platformdirs=4.9.2=pyhcf101f3_0
+  - prompt-toolkit=3.0.52=pyha770c72_0
+  - protobuf=6.33.0=py310ha4c6e68_0
+  - psutil=7.2.2=py310h1637853_0
+  - pure_eval=0.2.3=pyhd8ed1ab_1
+  - py-xgboost=3.1.2=py310haa95532_0
+  - pyarrow=21.0.0=py310h42c1672_1
+  - pycparser=2.23=py310haa95532_0
+  - pygments=2.19.2=pyhd8ed1ab_0
+  - pysocks=1.7.1=py310haa95532_1
+  - python=3.10.19=h981015d_0
+  - python-dateutil=2.9.0.post0=pyhe01879c_2
+  - python-flatbuffers=24.3.25=py310haa95532_0
+  - python_abi=3.10=2_cp310
+  - pywin32=311=py310h282bd7d_1
+  - pyyaml=6.0.3=py310hb9a58be_0
+  - pyzmq=27.1.0=py310h535538e_0
+  - re2=2025.11.05=hc24cdf5_0
+  - requests=2.32.5=py310haa95532_1
+  - rich=14.2.0=py310haa95532_0
+  - scipy=1.15.3=py310h1bbe36f_1
+  - setuptools=80.10.2=py310haa95532_0
+  - six=1.17.0=pyhe01879c_1
+  - snappy=1.2.2=hab6b7b3_1
+  - sqlite=3.51.1=hda9a48d_0
+  - stack_data=0.6.3=pyhd8ed1ab_1
+  - tbb=2022.3.0=h90c84d6_0
+  - tbb-devel=2022.3.0=h90c84d6_0
+  - tensorboard=2.20.0=py310haa95532_0
+  - tensorboard-data-server=0.7.0=py310haa95532_1
+  - tensorflow=2.20.0=cpu_py310h6605a60_0
+  - tensorflow-base=2.20.0=cpu_py310hce87ebc_0
+  - termcolor=3.2.0=py310haa95532_0
+  - threadpoolctl=3.5.0=py310h4442805_1
+  - tk=8.6.15=hf199647_0
+  - tornado=6.5.4=py310h29418f3_0
+  - traitlets=5.14.3=pyhd8ed1ab_1
+  - typing-extensions=4.15.0=py310haa95532_0
+  - typing_extensions=4.15.0=py310haa95532_0
+  - ucrt=10.0.22621.0=haa95532_0
+  - urllib3=2.6.3=py310haa95532_0
+  - utf8proc=2.6.1=h2bbff1b_1
+  - vc=14.42=haa95532_5
+  - vc14_runtime=14.44.35208=h4927774_10
+  - vs2015_runtime=14.44.35208=ha6b5a95_10
+  - wcwidth=0.6.0=pyhd8ed1ab_0
+  - werkzeug=3.1.3=py310haa95532_0
+  - wheel=0.46.3=py310haa95532_0
+  - win_inet_pton=1.1.0=py310haa95532_1
+  - wrapt=2.0.1=py310h02ab6af_0
+  - xgboost=3.1.2=py310haa95532_0
+  - xz=5.6.4=h4754444_1
+  - yaml=0.2.5=he774522_0
+  - zeromq=4.3.5=h5bddc39_9
+  - zlib=1.3.1=h02ab6af_0
+  - zstd=1.5.7=h56299aa_0
+  - pip:
+      - numpy==1.24.4
+      - pandas==2.3.0
+      - pyocclient==0.6
+      - pytz==2025.2
+      - scikit-learn==1.6.1
+      - tzdata==2025.3
+prefix: C:\Users\micha\anaconda3\envs\310
diff --git a/project_report.md b/project_report.md
index 4856f6c..4b35b3e 100644
--- a/project_report.md
+++ b/project_report.md
@@ -97,7 +97,7 @@ Included model families:
 
 Supporting utilities in ```model_training/tools```:
 - `scaler.py`: Functions to fit, transform, save and load either MinMaxScaler or StandardScaler, subject-wise and globally - for new subjects, a fallback scaler (using mean of all subjects scaling parameters) is used
-- `performance_split.py`: Provides a function to split a group of subjects based on their performance in the AdaBase experiments, based on the results created in `researchOnSubjectPerformance.ipynb`
+- `performance_split.py`: Provides a function to split a group of subjects based on their performance in the AdaBase experiments, based on the results created in `researchOnSubjectPerformance.ipynb`. To split into three groups for train, validation & test, call the function twice
 - `mad_outlier_removal.py`: Functions to fit and transform data with MAD outlier removal
 - `evaluation_tools.py`: Especially used for Isolation Forest, Functions for ROC curve as well as confusion matrix
 
@@ -115,18 +115,18 @@ Main script:
 
 Pipeline:
 - Loads runtime config (`predict_pipeline/config.yaml`)
-- Pulls latest row from SQLite (`database.path/table/key`)
-- Replaces missing values using `fallback` map
-- Optionally applies scaler (`.pkl`/`.joblib`)
+- Pulls latest row from SQLite
+- Replaces missing values using `fallback` map from config file - if more than 50% of values need to be replaced, the sample is dropped and "valid=False"
+- Optionally applies scaler (`.pkl`/`.joblib`) - set via config file
 - Loads model (`.keras`, `.pkl`, `.joblib`) and predicts
 - Publishes JSON payload to MQTT topic
 
 Expected payload form:
 ```json
 {
-  "valid": true,
-  "_id": 123,
-  "prediction": 0
+  "valid": true,    # false only if too many signals are invalid
+  "_id": 123,       # this is the sample ID from the database
+  "prediction": 0   # 0 for low load, 1 for high load
 }
 ```
 
@@ -135,11 +135,12 @@ Expected payload form:
 Files:
 - `predict_pipeline/predict.service`
 - `predict_pipeline/predict.timer`
-- `predict_pipeline/predict_service_timer_documentation.md`
 
 Role:
 - Run inference repeatedly without manual execution
-- Timer/service configuration can be customized per target machine
+- Timer/service configuration can be customized
+
+More information on how to use and interact with the system service and timer can be found in `predict_service_timer_documentation.md`
 
 ## 5.2 Runtime Configuration
 
@@ -155,23 +156,26 @@ Sections:
 - `fallback`: default values for NaN replacement
 
 Important:
-- The repository currently uses environment-specific absolute paths in some scripts/configs.
-- Paths should be normalized before deployment to a new machine.
+- The repository currently uses environment-specific absolute paths in some scripts/configs to ensure functionality on Ohm-UX driving simulator.
 
-## 5.3) Data and Feature Expectations
+
+## 5.3 Data and Feature Expectations
 
 Prediction expects SQLite rows containing:
 - `_Id`
-- `start_time`
+- `start_time` - this is not yet used for either predictions or messages
 - All configured model features (AUs + eye metrics)
 
-Common feature groups:
+Common feature groups (similar to own dataset):
 - `FACE_AUxx_mean` columns
 - Fixation counters and duration statistics
 - Saccade count/amplitude/duration statistics
 - Blink count/duration statistics
 - Pupil mean and IPA
 
+## 5.4 Create database from scratch
+To (re-)create the custom database for deployment, use `fill_db.ipynb`. Enter the path to your dataset, drop unnecessary columns and insert a subset of data with tool functions from `tools/db_helpers`
+
 ## 6) Installation and Dependencies
 Due to unsolvable dependency conflicts, several environemnts need to be used in the same time. 
 ### 6.1 Environemnt for camera handling
@@ -179,16 +183,8 @@ TO DO
 
 
 ### 6.2 Environment for predictions
-Install base requirements:
-```bash
-pip install -r requirements.txt
-```
-
-Typical key packages in this project:
-- `numpy`, `pandas`, `scikit-learn`, `scipy`, `pyarrow`, `pyyaml`, `joblib`
-- `opencv-python`, `mediapipe`, `torch`, `py-feat`, `pygazeanalyser`
-- `paho-mqtt`
-- optional data access stack (`pyocclient`, `h5py`, `tables`)
+If you want to use the existing deployment on Ohm-UX driving simulator's jetson board, activate conda environment `p310_FS_TF`, a python 3.10 environment including tensorflow and all other packages required to run `predict_sample.py`
+Otherwise, as described in `readme.md: Setup`, you can use `prediction_env.yaml`, to create a new environment that fulfills the requirements.
 
 ## 7) Repository File Inventory
 
@@ -201,10 +197,10 @@ Typical key packages in this project:
 
 ### Dataset Creation
 
-- `dataset_creation/parquet_file_creation.py` - local source to parquet conversion
+- `dataset_creation/parquet_file_creation.py` - local files to parquet conversion
 - `dataset_creation/create_parquet_files_from_owncloud.py` - ownCloud download + parquet conversion
 - `dataset_creation/combined_feature_creation.py` - sliding-window multimodal feature generation
-- `dataset_creation/maxDist.py` - helper/statistical utility script
+- `dataset_creation/maxDist.py` - helper/statistical utility script for eye-tracking feature creation 
 
 #### AU Creation
 - `dataset_creation/AU_creation/AU_creation_service.py` - AU extraction service workflow
diff --git a/readme.md b/readme.md
index 7c2c6a7..e10263d 100644
--- a/readme.md
+++ b/readme.md
@@ -11,11 +11,15 @@ Activate the conda-repository "".
 ```bash
     conda activate
 ```
-Make sure, another environment that fulfills requirement.txt is available, matching with predict_pipeline/predict.service - See `predict_pipeline/predict_service_timer_documentation.md`
+Make sure, another environment that fulfills prediction_env.yaml is available, matching with predict_pipeline/predict.service - See `predict_pipeline/predict_service_timer_documentation.md`
 To get an overview over all available conda environments on your device, use this command in anaconda prompt terminal: 
 ```bash
     conda info --envs
 ```
+Create a new environment based on the yaml-file:
+```bash
+    conda env create -f prediction_env.yaml
+```
 
 
 ### 2) Camera AU + Eye Pipeline (`camera_stream_AU_and_ET_new.py`)