added prediction env

2026-03-10 19:17:08 +01:00 · 2026-03-10 19:17:08 +01:00 · 10fdafa244
commit 10fdafa244
parent 314c4433d3
3 changed files with 223 additions and 27 deletions
--- a/predict_pipeline/prediction_env.yaml
+++ b/predict_pipeline/prediction_env.yaml
@ -0,0 +1,196 @@
 name: 'prediction_env'
 channels:
  - defaults
  - conda-forge
 dependencies:
  - _py-xgboost-mutex=2.0=cpu_2
  - absl-py=2.3.1=py310haa95532_0
  - aom=3.12.1=h00a0c3c_0
  - arrow-cpp=21.0.0=hcdc3a1c_2
  - asttokens=3.0.1=pyhd8ed1ab_0
  - astunparse=1.6.3=py_0
  - aws-c-auth=0.9.0=h02ab6af_2
  - aws-c-cal=0.9.2=h02ab6af_1
  - aws-c-common=0.12.4=h02ab6af_0
  - aws-c-compression=0.3.1=h02ab6af_2
  - aws-c-event-stream=0.5.6=h02ab6af_0
  - aws-c-http=0.10.4=h02ab6af_0
  - aws-c-io=0.21.4=h02ab6af_0
  - aws-c-mqtt=0.13.3=h02ab6af_0
  - aws-c-s3=0.8.7=h02ab6af_0
  - aws-c-sdkutils=0.2.4=h02ab6af_1
  - aws-checksums=0.2.7=h02ab6af_1
  - aws-crt-cpp=0.34.0=h885b0b7_0
  - aws-sdk-cpp=1.11.638=hf0af688_0
  - blas=1.0=mkl
  - brotlicffi=1.2.0.0=py310h885b0b7_0
  - bzip2=1.0.8=h2bbff1b_6
  - c-ares=1.34.6=h2c209ce_0
  - ca-certificates=2026.1.4=h4c7d964_0
  - cairo=1.18.4=he9e932c_0
  - certifi=2026.01.04=py310haa95532_0
  - cffi=2.0.0=py310h02ab6af_1
  - charset-normalizer=3.4.4=py310haa95532_0
  - colorama=0.4.6=pyhd8ed1ab_1
  - comm=0.2.3=pyhe01879c_0
  - dav1d=1.2.1=h2bbff1b_0
  - debugpy=1.8.20=py310h699e580_0
  - decorator=5.2.1=pyhd8ed1ab_0
  - exceptiongroup=1.3.1=pyhd8ed1ab_0
  - executing=2.2.1=pyhd8ed1ab_0
  - expat=2.7.4=hd7fb8db_0
  - flatbuffers=24.3.25=h21716d4_0
  - fontconfig=2.15.0=hd211d86_0
  - freeglut=3.8.0=hfcef157_0
  - freetype=2.14.1=hfbffc0b_0
  - fribidi=1.0.16=haf45083_0
  - gast=0.7.0=pyhd3eb1b0_0
  - gflags=2.2.2=hd77b12b_1
  - giflib=5.2.2=h7edc060_0
  - glog=0.5.0=hd77b12b_1
  - google-pasta=0.2.0=pyhd3eb1b0_0
  - graphite2=1.3.14=hd77b12b_1
  - grpcio=1.74.1=py310h5c751cc_0
  - h5py=3.15.1=py310he283ef2_1
  - harfbuzz=12.3.0=h3ef6528_1
  - hdf5=1.14.5=ha36df97_2
  - icc_rt=2022.1.0=h6049295_2
  - icu=73.1=h6c2663c_0
  - idna=3.11=py310haa95532_0
  - intel-openmp=2025.0.0=haa95532_1164
  - ipykernel=7.2.0=pyh6dadd2b_1
  - ipython=8.37.0=pyha7b4d00_0
  - jedi=0.19.2=pyhd8ed1ab_1
  - joblib=1.5.3=py310haa95532_0
  - jpeg=9f=ha349fce_0
  - jupyter_client=8.8.0=pyhcf101f3_0
  - jupyter_core=5.9.1=pyh6dadd2b_0
  - keras=3.11.2=py310h51baaa3_0
  - krb5=1.21.3=hdf4eb48_0
  - lcms2=2.17=h3732fa5_0
  - lerc=4.0.0=h5da7b33_0
  - libabseil=20250814.1=cxx17_hcd311fc_0
  - libavif=1.3.0=h5bd13ec_0
  - libbrotlicommon=1.2.0=h907acca_0
  - libbrotlidec=1.2.0=h02c67a5_0
  - libbrotlienc=1.2.0=h483e6b9_0
  - libcurl=8.17.0=h6e672f4_1
  - libdeflate=1.22=h5bf469e_0
  - libexpat=2.7.4=hd7fb8db_0
  - libffi=3.4.4=hd77b12b_1
  - libglib=2.86.3=h9bccc14_0
  - libgrpc=1.74.1=hde67744_0
  - libhwloc=2.12.1=default_hfa10c62_1000
  - libiconv=1.16=h2bbff1b_3
  - libkrb5=1.22.1=hb237eb7_0
  - libopenjpeg=2.5.4=h02ab6af_1
  - libpng=1.6.54=ha15c746_0
  - libprotobuf=6.33.0=h2a56892_1
  - libre2-11=2025.11.05=ha6b10e7_0
  - libsodium=1.0.20=hc70643c_0
  - libssh2=1.11.1=h2addb87_0
  - libthrift=0.22.0=ha2884a9_0
  - libtiff=4.7.1=h3a18249_0
  - libwebp-base=1.6.0=hbf3958f_0
  - libxgboost=3.1.2=h585ebfc_0
  - libxml2=2.13.9=h6201b9f_0
  - libzlib=1.3.1=h02ab6af_0
  - lz4-c=1.9.4=h2bbff1b_1
  - m2w64-gcc-libgfortran=5.3.0=6
  - m2w64-gcc-libs=5.3.0=7
  - m2w64-gcc-libs-core=5.3.0=7
  - m2w64-gmp=6.1.0=2
  - m2w64-libwinpthread-git=5.0.0.4634.697f757=2
  - markdown=3.10=py310haa95532_0
  - markdown-it-py=2.2.0=py310haa95532_1
  - markupsafe=3.0.2=py310h827c3e9_0
  - matplotlib-inline=0.2.1=pyhd8ed1ab_0
  - mdurl=0.1.2=py310haa95532_0
  - mkl=2025.0.0=h5da7b33_930
  - mkl-service=2.5.2=py310h0b37514_0
  - mkl_fft=2.1.1=py310h300f80d_0
  - mkl_random=1.3.0=py310ha5e6156_0
  - ml_dtypes=0.5.4=py310h42c1672_0
  - mpi=1.0=msmpi
  - mpi4py=4.0.3=py310h02ab6af_1
  - msmpi=10.1.1=had4844c_0
  - msys2-conda-epoch=20160418=1
  - namex=0.1.0=py310haa95532_0
  - nest-asyncio=1.6.0=pyhd8ed1ab_1
  - numpy-base=2.1.3=py310he4e2855_3
  - openssl=3.6.1=hf411b9b_1
  - opt_einsum=3.3.0=pyhd3eb1b0_1
  - optree=0.18.0=py310h03f52e7_0
  - orc=2.2.0=h79e1e1e_1
  - packaging=25.0=py310haa95532_1
  - paho-mqtt=2.1.0=pyhe01879c_1
  - parso=0.8.6=pyhcf101f3_0
  - pcre2=10.46=h5740b90_0
  - pickleshare=0.7.5=pyhd8ed1ab_1004
  - pillow=12.1.0=py310h6b7a805_0
  - pip=26.0.1=pyhc872135_0
  - pixman=0.46.4=h4043f72_0
  - platformdirs=4.9.2=pyhcf101f3_0
  - prompt-toolkit=3.0.52=pyha770c72_0
  - protobuf=6.33.0=py310ha4c6e68_0
  - psutil=7.2.2=py310h1637853_0
  - pure_eval=0.2.3=pyhd8ed1ab_1
  - py-xgboost=3.1.2=py310haa95532_0
  - pyarrow=21.0.0=py310h42c1672_1
  - pycparser=2.23=py310haa95532_0
  - pygments=2.19.2=pyhd8ed1ab_0
  - pysocks=1.7.1=py310haa95532_1
  - python=3.10.19=h981015d_0
  - python-dateutil=2.9.0.post0=pyhe01879c_2
  - python-flatbuffers=24.3.25=py310haa95532_0
  - python_abi=3.10=2_cp310
  - pywin32=311=py310h282bd7d_1
  - pyyaml=6.0.3=py310hb9a58be_0
  - pyzmq=27.1.0=py310h535538e_0
  - re2=2025.11.05=hc24cdf5_0
  - requests=2.32.5=py310haa95532_1
  - rich=14.2.0=py310haa95532_0
  - scipy=1.15.3=py310h1bbe36f_1
  - setuptools=80.10.2=py310haa95532_0
  - six=1.17.0=pyhe01879c_1
  - snappy=1.2.2=hab6b7b3_1
  - sqlite=3.51.1=hda9a48d_0
  - stack_data=0.6.3=pyhd8ed1ab_1
  - tbb=2022.3.0=h90c84d6_0
  - tbb-devel=2022.3.0=h90c84d6_0
  - tensorboard=2.20.0=py310haa95532_0
  - tensorboard-data-server=0.7.0=py310haa95532_1
  - tensorflow=2.20.0=cpu_py310h6605a60_0
  - tensorflow-base=2.20.0=cpu_py310hce87ebc_0
  - termcolor=3.2.0=py310haa95532_0
  - threadpoolctl=3.5.0=py310h4442805_1
  - tk=8.6.15=hf199647_0
  - tornado=6.5.4=py310h29418f3_0
  - traitlets=5.14.3=pyhd8ed1ab_1
  - typing-extensions=4.15.0=py310haa95532_0
  - typing_extensions=4.15.0=py310haa95532_0
  - ucrt=10.0.22621.0=haa95532_0
  - urllib3=2.6.3=py310haa95532_0
  - utf8proc=2.6.1=h2bbff1b_1
  - vc=14.42=haa95532_5
  - vc14_runtime=14.44.35208=h4927774_10
  - vs2015_runtime=14.44.35208=ha6b5a95_10
  - wcwidth=0.6.0=pyhd8ed1ab_0
  - werkzeug=3.1.3=py310haa95532_0
  - wheel=0.46.3=py310haa95532_0
  - win_inet_pton=1.1.0=py310haa95532_1
  - wrapt=2.0.1=py310h02ab6af_0
  - xgboost=3.1.2=py310haa95532_0
  - xz=5.6.4=h4754444_1
  - yaml=0.2.5=he774522_0
  - zeromq=4.3.5=h5bddc39_9
  - zlib=1.3.1=h02ab6af_0
  - zstd=1.5.7=h56299aa_0
  - pip:
      - numpy==1.24.4
      - pandas==2.3.0
      - pyocclient==0.6
      - pytz==2025.2
      - scikit-learn==1.6.1
      - tzdata==2025.3
 prefix: C:\Users\micha\anaconda3\envs\310
--- a/project_report.md
+++ b/project_report.md
@ -97,7 +97,7 @@ Included model families:
 Supporting utilities in ```model_training/tools```:
 - `scaler.py`: Functions to fit, transform, save and load either MinMaxScaler or StandardScaler, subject-wise and globally - for new subjects, a fallback scaler (using mean of all subjects scaling parameters) is used
- `performance_split.py`: Provides a function to split a group of subjects based on their performance in the AdaBase experiments, based on the results created in `researchOnSubjectPerformance.ipynb`
+- `performance_split.py`: Provides a function to split a group of subjects based on their performance in the AdaBase experiments, based on the results created in `researchOnSubjectPerformance.ipynb`. To split into three groups for train, validation & test, call the function twice
 - `mad_outlier_removal.py`: Functions to fit and transform data with MAD outlier removal
 - `evaluation_tools.py`: Especially used for Isolation Forest, Functions for ROC curve as well as confusion matrix
@ -115,18 +115,18 @@ Main script:
 Pipeline:
 - Loads runtime config (`predict_pipeline/config.yaml`)
- Pulls latest row from SQLite (`database.path/table/key`)
+- Pulls latest row from SQLite
- Replaces missing values using `fallback` map
+- Replaces missing values using `fallback` map from config file - if more than 50% of values need to be replaced, the sample is dropped and "valid=False"
- Optionally applies scaler (`.pkl`/`.joblib`)
+- Optionally applies scaler (`.pkl`/`.joblib`) - set via config file
 - Loads model (`.keras`, `.pkl`, `.joblib`) and predicts
 - Publishes JSON payload to MQTT topic
 Expected payload form:
 ```json
 {
-  "valid": true,
+  "valid": true,    # false only if too many signals are invalid
-  "_id": 123,
+  "_id": 123,       # this is the sample ID from the database
-  "prediction": 0
+  "prediction": 0   # 0 for low load, 1 for high load
 }
 ```
@ -135,11 +135,12 @@ Expected payload form:
 Files:
 - `predict_pipeline/predict.service`
 - `predict_pipeline/predict.timer`
 - `predict_pipeline/predict_service_timer_documentation.md`
 Role:
 - Run inference repeatedly without manual execution
- Timer/service configuration can be customized per target machine
+- Timer/service configuration can be customized
 More information on how to use and interact with the system service and timer can be found in `predict_service_timer_documentation.md`
 ## 5.2 Runtime Configuration
@ -155,23 +156,26 @@ Sections:
 - `fallback`: default values for NaN replacement
 Important:
- The repository currently uses environment-specific absolute paths in some scripts/configs.
+- The repository currently uses environment-specific absolute paths in some scripts/configs to ensure functionality on Ohm-UX driving simulator.
 - Paths should be normalized before deployment to a new machine.
-## 5.3) Data and Feature Expectations
+
 ## 5.3 Data and Feature Expectations
 Prediction expects SQLite rows containing:
 - `_Id`
- `start_time`
+- `start_time` - this is not yet used for either predictions or messages
 - All configured model features (AUs + eye metrics)
-Common feature groups:
+Common feature groups (similar to own dataset):
 - `FACE_AUxx_mean` columns
 - Fixation counters and duration statistics
 - Saccade count/amplitude/duration statistics
 - Blink count/duration statistics
 - Pupil mean and IPA
 ## 5.4 Create database from scratch
 To (re-)create the custom database for deployment, use `fill_db.ipynb`. Enter the path to your dataset, drop unnecessary columns and insert a subset of data with tool functions from `tools/db_helpers`
 ## 6) Installation and Dependencies
 Due to unsolvable dependency conflicts, several environemnts need to be used in the same time. 
 ### 6.1 Environemnt for camera handling
@ -179,16 +183,8 @@ TO DO
 ### 6.2 Environment for predictions
-Install base requirements:
+If you want to use the existing deployment on Ohm-UX driving simulator's jetson board, activate conda environment `p310_FS_TF`, a python 3.10 environment including tensorflow and all other packages required to run `predict_sample.py`
-```bash
+Otherwise, as described in `readme.md: Setup`, you can use `prediction_env.yaml`, to create a new environment that fulfills the requirements.
 pip install -r requirements.txt
 ```
 Typical key packages in this project:
 - `numpy`, `pandas`, `scikit-learn`, `scipy`, `pyarrow`, `pyyaml`, `joblib`
 - `opencv-python`, `mediapipe`, `torch`, `py-feat`, `pygazeanalyser`
 - `paho-mqtt`
 - optional data access stack (`pyocclient`, `h5py`, `tables`)
 ## 7) Repository File Inventory
@ -201,10 +197,10 @@ Typical key packages in this project:
 ### Dataset Creation
- `dataset_creation/parquet_file_creation.py` - local source to parquet conversion
+- `dataset_creation/parquet_file_creation.py` - local files to parquet conversion
 - `dataset_creation/create_parquet_files_from_owncloud.py` - ownCloud download + parquet conversion
 - `dataset_creation/combined_feature_creation.py` - sliding-window multimodal feature generation
- `dataset_creation/maxDist.py` - helper/statistical utility script
+- `dataset_creation/maxDist.py` - helper/statistical utility script for eye-tracking feature creation 
 #### AU Creation
 - `dataset_creation/AU_creation/AU_creation_service.py` - AU extraction service workflow
--- a/readme.md
+++ b/readme.md
@ -11,11 +11,15 @@ Activate the conda-repository "".
 ```bash
    conda activate
 ```
-Make sure, another environment that fulfills requirement.txt is available, matching with predict_pipeline/predict.service - See `predict_pipeline/predict_service_timer_documentation.md`
+Make sure, another environment that fulfills prediction_env.yaml is available, matching with predict_pipeline/predict.service - See `predict_pipeline/predict_service_timer_documentation.md`
 To get an overview over all available conda environments on your device, use this command in anaconda prompt terminal: 
 ```bash
    conda info --envs
 ```
 Create a new environment based on the yaml-file:
 ```bash
    conda env create -f prediction_env.yaml
 ```
 ### 2) Camera AU + Eye Pipeline (`camera_stream_AU_and_ET_new.py`)