commit 3c58137ae826c6a35d8268bd2fa087accd7c56fd Author: rennerph Date: Tue Nov 26 12:04:28 2024 +0100 Initial commit diff --git a/__pycache__/butterworth_filter.cpython-312.pyc b/__pycache__/butterworth_filter.cpython-312.pyc new file mode 100644 index 0000000..dbe963c Binary files /dev/null and b/__pycache__/butterworth_filter.cpython-312.pyc differ diff --git a/__pycache__/ideal_filter.cpython-312.pyc b/__pycache__/ideal_filter.cpython-312.pyc new file mode 100644 index 0000000..b687744 Binary files /dev/null and b/__pycache__/ideal_filter.cpython-312.pyc differ diff --git a/__pycache__/parser.cpython-312.pyc b/__pycache__/parser.cpython-312.pyc new file mode 100644 index 0000000..4f1ad13 Binary files /dev/null and b/__pycache__/parser.cpython-312.pyc differ diff --git a/__pycache__/pyramid.cpython-312.pyc b/__pycache__/pyramid.cpython-312.pyc new file mode 100644 index 0000000..bcf3525 Binary files /dev/null and b/__pycache__/pyramid.cpython-312.pyc differ diff --git a/__pycache__/video.cpython-312.pyc b/__pycache__/video.cpython-312.pyc new file mode 100644 index 0000000..629f66e Binary files /dev/null and b/__pycache__/video.cpython-312.pyc differ diff --git a/butterworth_filter.py b/butterworth_filter.py new file mode 100644 index 0000000..4226cf0 --- /dev/null +++ b/butterworth_filter.py @@ -0,0 +1,150 @@ +import numpy as np +from scipy.signal import butter, lfilter +import pyramid +import video +import numpy as np +from scipy.fft import fft, fftfreq + +import numpy as np +from scipy import fftpack, signal +from numpy.fft import fft, fftfreq + +# Temporal bandpass filter with Fast-Fourier Transform +def fft_filter(video, freq_min, freq_max, fps): + fft = fftpack.fft(video, axis=0) + frequencies = fftpack.fftfreq(video.shape[0], d=1.0 / fps) + bound_low = (np.abs(frequencies - freq_min)).argmin() + bound_high = (np.abs(frequencies - freq_max)).argmin() + + # Zero out frequencies outside the desired range + fft[:bound_low] = 0 + fft[bound_high:] = 0 + + # Apply inverse FFT to get the filtered video + filtered_video = np.abs(fftpack.ifft(fft, axis=0)) + return filtered_video, fft, frequencies + +def find_breath_rate(fft, freqs, freq_min, freq_max): + fft_maximums = [] + + # Iterate through the frequencies and accumulate FFT amplitude + for i in range(fft.shape[0]): + if freq_min <= freqs[i] <= freq_max: + fft_maximums.append(np.abs(fft[i]).max()) + else: + fft_maximums.append(0) + + peaks, _ = signal.find_peaks(fft_maximums) + + if len(peaks) == 0: + return 0 # No peaks found + + max_peak = peaks[np.argmax([fft_maximums[peak] for peak in peaks])] + return freqs[max_peak] * 60 # Convert frequency (Hz) to breaths per minute (bpm) + + +def butter_bandpass(lowcut, highcut, fs, order=1): + """ + Calculates the Butterworth bandpass filter coefficients. + + :param lowcut: Low frequency cutoff (e.g., 0.1 Hz for breath detection). + :param highcut: High frequency cutoff (e.g., 0.5 Hz for breath detection). + :param fs: Video frame rate (sampling frequency). + :param order: Filter order (default is 1). + :return: Numerator (b) and denominator (a) polynomials of the IIR filter. + """ + low = lowcut / (0.5 * fs) # Normalize the frequencies by Nyquist frequency + high = highcut / (0.5 * fs) + b, a = butter(order, [low, high], btype='band') + return b, a + +def apply_butter(laplace_video_list, levels, alpha, cutoff=20, low=0.1, high=0.5, fps=30, width=512, height=512, linearAttenuation=True): + """ + Applies the Butterworth filter to the video sequence, magnifies the filtered video sequence, + and attenuates spatial frequencies for breath detection. + + :param laplace_video_list: Laplace video pyramid. + :param levels: Pyramid levels. + :param alpha: Magnification factor. + :param cutoff: Spatial frequencies cutoff factor. + :param low: Temporal low frequency cutoff (related to the breath rate). + :param high: Temporal high frequency cutoff. + :param fps: Video frame rate. + :param width: Video frame width. + :param height: Video frame height. + :param linearAttenuation: Whether to apply linear attenuation. + :return: List of filtered video frames. + """ + print('Applying Butterworth filter...') + filtered_video_list = [] + b, a = butter_bandpass(low, high, fps, order=1) + + # Spatial wavelength (lambda) + lambda1 = (width ** 2 + height ** 2) ** 0.5 + delta = cutoff / 8 / (1 + alpha) + + for i in range(levels): # Iterate through pyramid levels + current_alpha = lambda1 / (8 * delta) - 1 # Alpha calculation + current_alpha /= 2 + + # Apply the Butterworth filter to the temporal image sequence + filtered = lfilter(b, a, laplace_video_list[i], axis=0) + + # Ignore the lowest and highest pyramid levels + if i == levels - 1 or i == 0: + filtered *= 0 + + # Spatial frequencies attenuation + if current_alpha > alpha: + filtered *= alpha + else: + filtered *= current_alpha if linearAttenuation else 0 + + filtered_video_list.append(filtered) + lambda1 /= 2 # Decrease lambda for the next level + + return filtered_video_list + +def start(video_frames, alpha=50, cutoff=16, low=0.1, high=0.5, linearAttenuation=True, chromAttenuation=0.4, fps=30, width=512, height=512, time_window = 5): + """ + Performs motion magnification on the video frames by applying Butterworth bandpass filter and saves the output video. + This can be used for detecting breathing patterns. + + :param video_frames: Numpy array containing video frames (shape: [num_frames, height, width, channels]) + :param alpha: Magnification factor (e.g., 50 for subtle motion like breathing). + :param cutoff: Spatial frequencies cutoff factor. + :param low: Temporal low frequency cutoff (e.g., 0.1 Hz for breaths). + :param high: Temporal high frequency cutoff (e.g., 0.5 Hz for breaths). + :param fps: Frames per second of the video. + :param width: Width of the video frames. + :param height: Height of the video frames. + :return: Filtered video frames and estimated breathing rate. + """ + + # Convert RGB to YIQ for luminance-based processing + yiq_video = video.rgb2yiq(video_frames) + + # Determine pyramid levels for the video + levels = video.calculate_pyramid_levels(width, height) + + # Build Laplacian pyramid for each video frame + lap_video_list = pyramid.laplacian_video_pyramid(yiq_video, levels) + + # Apply Butterworth filter for breath frequencies (low=0.1 Hz, high=0.5 Hz) + filtered_video_list = apply_butter(lap_video_list, levels, alpha, cutoff, low, high, fps, width, height, linearAttenuation) + + # Reconstruct the magnified video from the filtered pyramid + final_video = pyramid.reconstruct(filtered_video_list, levels) + filtered_video, fft_data, frequencies = fft_filter(final_video.mean(axis=(1, 2, 3)), low, high, fps) + bpm = find_breath_rate(fft_data, frequencies, low, high) + + final_video += yiq_video + final_rgb = video.yiq2rgb(final_video) + + final_rgb = np.clip(final_rgb, 0, 255).astype(np.uint8) + + return final_rgb, bpm + + + + diff --git a/config.ini b/config.ini new file mode 100644 index 0000000..303d083 --- /dev/null +++ b/config.ini @@ -0,0 +1,12 @@ +[Parameters] +alpha = 20 +low = 60 +high = 120 +chromattenuation = 0.1 +mode = some_mode + +[Video] +width = 1280 +height = 720 +fps = 30.0 + diff --git a/deploy.prototxt b/deploy.prototxt new file mode 100644 index 0000000..a128515 --- /dev/null +++ b/deploy.prototxt @@ -0,0 +1,1790 @@ +input: "data" +input_shape { + dim: 1 + dim: 3 + dim: 300 + dim: 300 +} + +layer { + name: "data_bn" + type: "BatchNorm" + bottom: "data" + top: "data_bn" + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } +} +layer { + name: "data_scale" + type: "Scale" + bottom: "data_bn" + top: "data_bn" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 1.0 + } + scale_param { + bias_term: true + } +} +layer { + name: "conv1_h" + type: "Convolution" + bottom: "data_bn" + top: "conv1_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 32 + pad: 3 + kernel_size: 7 + stride: 2 + weight_filler { + type: "msra" + variance_norm: FAN_OUT + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv1_bn_h" + type: "BatchNorm" + bottom: "conv1_h" + top: "conv1_h" + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } +} +layer { + name: "conv1_scale_h" + type: "Scale" + bottom: "conv1_h" + top: "conv1_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 1.0 + } + scale_param { + bias_term: true + } +} +layer { + name: "conv1_relu" + type: "ReLU" + bottom: "conv1_h" + top: "conv1_h" +} +layer { + name: "conv1_pool" + type: "Pooling" + bottom: "conv1_h" + top: "conv1_pool" + pooling_param { + kernel_size: 3 + stride: 2 + } +} +layer { + name: "layer_64_1_conv1_h" + type: "Convolution" + bottom: "conv1_pool" + top: "layer_64_1_conv1_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 32 + bias_term: false + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "layer_64_1_bn2_h" + type: "BatchNorm" + bottom: "layer_64_1_conv1_h" + top: "layer_64_1_conv1_h" + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } +} +layer { + name: "layer_64_1_scale2_h" + type: "Scale" + bottom: "layer_64_1_conv1_h" + top: "layer_64_1_conv1_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 1.0 + } + scale_param { + bias_term: true + } +} +layer { + name: "layer_64_1_relu2" + type: "ReLU" + bottom: "layer_64_1_conv1_h" + top: "layer_64_1_conv1_h" +} +layer { + name: "layer_64_1_conv2_h" + type: "Convolution" + bottom: "layer_64_1_conv1_h" + top: "layer_64_1_conv2_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 32 + bias_term: false + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "layer_64_1_sum" + type: "Eltwise" + bottom: "layer_64_1_conv2_h" + bottom: "conv1_pool" + top: "layer_64_1_sum" +} +layer { + name: "layer_128_1_bn1_h" + type: "BatchNorm" + bottom: "layer_64_1_sum" + top: "layer_128_1_bn1_h" + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } +} +layer { + name: "layer_128_1_scale1_h" + type: "Scale" + bottom: "layer_128_1_bn1_h" + top: "layer_128_1_bn1_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 1.0 + } + scale_param { + bias_term: true + } +} +layer { + name: "layer_128_1_relu1" + type: "ReLU" + bottom: "layer_128_1_bn1_h" + top: "layer_128_1_bn1_h" +} +layer { + name: "layer_128_1_conv1_h" + type: "Convolution" + bottom: "layer_128_1_bn1_h" + top: "layer_128_1_conv1_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 128 + bias_term: false + pad: 1 + kernel_size: 3 + stride: 2 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "layer_128_1_bn2" + type: "BatchNorm" + bottom: "layer_128_1_conv1_h" + top: "layer_128_1_conv1_h" + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } +} +layer { + name: "layer_128_1_scale2" + type: "Scale" + bottom: "layer_128_1_conv1_h" + top: "layer_128_1_conv1_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 1.0 + } + scale_param { + bias_term: true + } +} +layer { + name: "layer_128_1_relu2" + type: "ReLU" + bottom: "layer_128_1_conv1_h" + top: "layer_128_1_conv1_h" +} +layer { + name: "layer_128_1_conv2" + type: "Convolution" + bottom: "layer_128_1_conv1_h" + top: "layer_128_1_conv2" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 128 + bias_term: false + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "layer_128_1_conv_expand_h" + type: "Convolution" + bottom: "layer_128_1_bn1_h" + top: "layer_128_1_conv_expand_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 128 + bias_term: false + pad: 0 + kernel_size: 1 + stride: 2 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "layer_128_1_sum" + type: "Eltwise" + bottom: "layer_128_1_conv2" + bottom: "layer_128_1_conv_expand_h" + top: "layer_128_1_sum" +} +layer { + name: "layer_256_1_bn1" + type: "BatchNorm" + bottom: "layer_128_1_sum" + top: "layer_256_1_bn1" + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } +} +layer { + name: "layer_256_1_scale1" + type: "Scale" + bottom: "layer_256_1_bn1" + top: "layer_256_1_bn1" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 1.0 + } + scale_param { + bias_term: true + } +} +layer { + name: "layer_256_1_relu1" + type: "ReLU" + bottom: "layer_256_1_bn1" + top: "layer_256_1_bn1" +} +layer { + name: "layer_256_1_conv1" + type: "Convolution" + bottom: "layer_256_1_bn1" + top: "layer_256_1_conv1" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 256 + bias_term: false + pad: 1 + kernel_size: 3 + stride: 2 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "layer_256_1_bn2" + type: "BatchNorm" + bottom: "layer_256_1_conv1" + top: "layer_256_1_conv1" + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } +} +layer { + name: "layer_256_1_scale2" + type: "Scale" + bottom: "layer_256_1_conv1" + top: "layer_256_1_conv1" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 1.0 + } + scale_param { + bias_term: true + } +} +layer { + name: "layer_256_1_relu2" + type: "ReLU" + bottom: "layer_256_1_conv1" + top: "layer_256_1_conv1" +} +layer { + name: "layer_256_1_conv2" + type: "Convolution" + bottom: "layer_256_1_conv1" + top: "layer_256_1_conv2" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 256 + bias_term: false + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "layer_256_1_conv_expand" + type: "Convolution" + bottom: "layer_256_1_bn1" + top: "layer_256_1_conv_expand" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 256 + bias_term: false + pad: 0 + kernel_size: 1 + stride: 2 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "layer_256_1_sum" + type: "Eltwise" + bottom: "layer_256_1_conv2" + bottom: "layer_256_1_conv_expand" + top: "layer_256_1_sum" +} +layer { + name: "layer_512_1_bn1" + type: "BatchNorm" + bottom: "layer_256_1_sum" + top: "layer_512_1_bn1" + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } +} +layer { + name: "layer_512_1_scale1" + type: "Scale" + bottom: "layer_512_1_bn1" + top: "layer_512_1_bn1" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 1.0 + } + scale_param { + bias_term: true + } +} +layer { + name: "layer_512_1_relu1" + type: "ReLU" + bottom: "layer_512_1_bn1" + top: "layer_512_1_bn1" +} +layer { + name: "layer_512_1_conv1_h" + type: "Convolution" + bottom: "layer_512_1_bn1" + top: "layer_512_1_conv1_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 128 + bias_term: false + pad: 1 + kernel_size: 3 + stride: 1 # 2 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "layer_512_1_bn2_h" + type: "BatchNorm" + bottom: "layer_512_1_conv1_h" + top: "layer_512_1_conv1_h" + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } +} +layer { + name: "layer_512_1_scale2_h" + type: "Scale" + bottom: "layer_512_1_conv1_h" + top: "layer_512_1_conv1_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 1.0 + } + scale_param { + bias_term: true + } +} +layer { + name: "layer_512_1_relu2" + type: "ReLU" + bottom: "layer_512_1_conv1_h" + top: "layer_512_1_conv1_h" +} +layer { + name: "layer_512_1_conv2_h" + type: "Convolution" + bottom: "layer_512_1_conv1_h" + top: "layer_512_1_conv2_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 256 + bias_term: false + pad: 2 # 1 + kernel_size: 3 + stride: 1 + dilation: 2 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "layer_512_1_conv_expand_h" + type: "Convolution" + bottom: "layer_512_1_bn1" + top: "layer_512_1_conv_expand_h" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + convolution_param { + num_output: 256 + bias_term: false + pad: 0 + kernel_size: 1 + stride: 1 # 2 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "layer_512_1_sum" + type: "Eltwise" + bottom: "layer_512_1_conv2_h" + bottom: "layer_512_1_conv_expand_h" + top: "layer_512_1_sum" +} +layer { + name: "last_bn_h" + type: "BatchNorm" + bottom: "layer_512_1_sum" + top: "layer_512_1_sum" + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } + param { + lr_mult: 0.0 + } +} +layer { + name: "last_scale_h" + type: "Scale" + bottom: "layer_512_1_sum" + top: "layer_512_1_sum" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 1.0 + } + scale_param { + bias_term: true + } +} +layer { + name: "last_relu" + type: "ReLU" + bottom: "layer_512_1_sum" + top: "fc7" +} + +layer { + name: "conv6_1_h" + type: "Convolution" + bottom: "fc7" + top: "conv6_1_h" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv6_1_relu" + type: "ReLU" + bottom: "conv6_1_h" + top: "conv6_1_h" +} +layer { + name: "conv6_2_h" + type: "Convolution" + bottom: "conv6_1_h" + top: "conv6_2_h" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + stride: 2 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv6_2_relu" + type: "ReLU" + bottom: "conv6_2_h" + top: "conv6_2_h" +} +layer { + name: "conv7_1_h" + type: "Convolution" + bottom: "conv6_2_h" + top: "conv7_1_h" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 0 + kernel_size: 1 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv7_1_relu" + type: "ReLU" + bottom: "conv7_1_h" + top: "conv7_1_h" +} +layer { + name: "conv7_2_h" + type: "Convolution" + bottom: "conv7_1_h" + top: "conv7_2_h" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + stride: 2 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv7_2_relu" + type: "ReLU" + bottom: "conv7_2_h" + top: "conv7_2_h" +} +layer { + name: "conv8_1_h" + type: "Convolution" + bottom: "conv7_2_h" + top: "conv8_1_h" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 0 + kernel_size: 1 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv8_1_relu" + type: "ReLU" + bottom: "conv8_1_h" + top: "conv8_1_h" +} +layer { + name: "conv8_2_h" + type: "Convolution" + bottom: "conv8_1_h" + top: "conv8_2_h" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv8_2_relu" + type: "ReLU" + bottom: "conv8_2_h" + top: "conv8_2_h" +} +layer { + name: "conv9_1_h" + type: "Convolution" + bottom: "conv8_2_h" + top: "conv9_1_h" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 0 + kernel_size: 1 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv9_1_relu" + type: "ReLU" + bottom: "conv9_1_h" + top: "conv9_1_h" +} +layer { + name: "conv9_2_h" + type: "Convolution" + bottom: "conv9_1_h" + top: "conv9_2_h" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv9_2_relu" + type: "ReLU" + bottom: "conv9_2_h" + top: "conv9_2_h" +} +layer { + name: "conv4_3_norm" + type: "Normalize" + bottom: "layer_256_1_bn1" + top: "conv4_3_norm" + norm_param { + across_spatial: false + scale_filler { + type: "constant" + value: 20 + } + channel_shared: false + } +} +layer { + name: "conv4_3_norm_mbox_loc" + type: "Convolution" + bottom: "conv4_3_norm" + top: "conv4_3_norm_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 16 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv4_3_norm_mbox_loc_perm" + type: "Permute" + bottom: "conv4_3_norm_mbox_loc" + top: "conv4_3_norm_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv4_3_norm_mbox_loc_flat" + type: "Flatten" + bottom: "conv4_3_norm_mbox_loc_perm" + top: "conv4_3_norm_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv4_3_norm_mbox_conf" + type: "Convolution" + bottom: "conv4_3_norm" + top: "conv4_3_norm_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 8 # 84 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv4_3_norm_mbox_conf_perm" + type: "Permute" + bottom: "conv4_3_norm_mbox_conf" + top: "conv4_3_norm_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv4_3_norm_mbox_conf_flat" + type: "Flatten" + bottom: "conv4_3_norm_mbox_conf_perm" + top: "conv4_3_norm_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv4_3_norm_mbox_priorbox" + type: "PriorBox" + bottom: "conv4_3_norm" + bottom: "data" + top: "conv4_3_norm_mbox_priorbox" + prior_box_param { + min_size: 30.0 + max_size: 60.0 + aspect_ratio: 2 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 8 + offset: 0.5 + } +} +layer { + name: "fc7_mbox_loc" + type: "Convolution" + bottom: "fc7" + top: "fc7_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 24 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "fc7_mbox_loc_perm" + type: "Permute" + bottom: "fc7_mbox_loc" + top: "fc7_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "fc7_mbox_loc_flat" + type: "Flatten" + bottom: "fc7_mbox_loc_perm" + top: "fc7_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "fc7_mbox_conf" + type: "Convolution" + bottom: "fc7" + top: "fc7_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 12 # 126 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "fc7_mbox_conf_perm" + type: "Permute" + bottom: "fc7_mbox_conf" + top: "fc7_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "fc7_mbox_conf_flat" + type: "Flatten" + bottom: "fc7_mbox_conf_perm" + top: "fc7_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "fc7_mbox_priorbox" + type: "PriorBox" + bottom: "fc7" + bottom: "data" + top: "fc7_mbox_priorbox" + prior_box_param { + min_size: 60.0 + max_size: 111.0 + aspect_ratio: 2 + aspect_ratio: 3 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 16 + offset: 0.5 + } +} +layer { + name: "conv6_2_mbox_loc" + type: "Convolution" + bottom: "conv6_2_h" + top: "conv6_2_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 24 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv6_2_mbox_loc_perm" + type: "Permute" + bottom: "conv6_2_mbox_loc" + top: "conv6_2_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv6_2_mbox_loc_flat" + type: "Flatten" + bottom: "conv6_2_mbox_loc_perm" + top: "conv6_2_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv6_2_mbox_conf" + type: "Convolution" + bottom: "conv6_2_h" + top: "conv6_2_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 12 # 126 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv6_2_mbox_conf_perm" + type: "Permute" + bottom: "conv6_2_mbox_conf" + top: "conv6_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv6_2_mbox_conf_flat" + type: "Flatten" + bottom: "conv6_2_mbox_conf_perm" + top: "conv6_2_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv6_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv6_2_h" + bottom: "data" + top: "conv6_2_mbox_priorbox" + prior_box_param { + min_size: 111.0 + max_size: 162.0 + aspect_ratio: 2 + aspect_ratio: 3 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 32 + offset: 0.5 + } +} +layer { + name: "conv7_2_mbox_loc" + type: "Convolution" + bottom: "conv7_2_h" + top: "conv7_2_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 24 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv7_2_mbox_loc_perm" + type: "Permute" + bottom: "conv7_2_mbox_loc" + top: "conv7_2_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv7_2_mbox_loc_flat" + type: "Flatten" + bottom: "conv7_2_mbox_loc_perm" + top: "conv7_2_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv7_2_mbox_conf" + type: "Convolution" + bottom: "conv7_2_h" + top: "conv7_2_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 12 # 126 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv7_2_mbox_conf_perm" + type: "Permute" + bottom: "conv7_2_mbox_conf" + top: "conv7_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv7_2_mbox_conf_flat" + type: "Flatten" + bottom: "conv7_2_mbox_conf_perm" + top: "conv7_2_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv7_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv7_2_h" + bottom: "data" + top: "conv7_2_mbox_priorbox" + prior_box_param { + min_size: 162.0 + max_size: 213.0 + aspect_ratio: 2 + aspect_ratio: 3 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 64 + offset: 0.5 + } +} +layer { + name: "conv8_2_mbox_loc" + type: "Convolution" + bottom: "conv8_2_h" + top: "conv8_2_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 16 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv8_2_mbox_loc_perm" + type: "Permute" + bottom: "conv8_2_mbox_loc" + top: "conv8_2_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv8_2_mbox_loc_flat" + type: "Flatten" + bottom: "conv8_2_mbox_loc_perm" + top: "conv8_2_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv8_2_mbox_conf" + type: "Convolution" + bottom: "conv8_2_h" + top: "conv8_2_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 8 # 84 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv8_2_mbox_conf_perm" + type: "Permute" + bottom: "conv8_2_mbox_conf" + top: "conv8_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv8_2_mbox_conf_flat" + type: "Flatten" + bottom: "conv8_2_mbox_conf_perm" + top: "conv8_2_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv8_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv8_2_h" + bottom: "data" + top: "conv8_2_mbox_priorbox" + prior_box_param { + min_size: 213.0 + max_size: 264.0 + aspect_ratio: 2 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 100 + offset: 0.5 + } +} +layer { + name: "conv9_2_mbox_loc" + type: "Convolution" + bottom: "conv9_2_h" + top: "conv9_2_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 16 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv9_2_mbox_loc_perm" + type: "Permute" + bottom: "conv9_2_mbox_loc" + top: "conv9_2_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv9_2_mbox_loc_flat" + type: "Flatten" + bottom: "conv9_2_mbox_loc_perm" + top: "conv9_2_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv9_2_mbox_conf" + type: "Convolution" + bottom: "conv9_2_h" + top: "conv9_2_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 8 # 84 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv9_2_mbox_conf_perm" + type: "Permute" + bottom: "conv9_2_mbox_conf" + top: "conv9_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv9_2_mbox_conf_flat" + type: "Flatten" + bottom: "conv9_2_mbox_conf_perm" + top: "conv9_2_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv9_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv9_2_h" + bottom: "data" + top: "conv9_2_mbox_priorbox" + prior_box_param { + min_size: 264.0 + max_size: 315.0 + aspect_ratio: 2 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 300 + offset: 0.5 + } +} +layer { + name: "mbox_loc" + type: "Concat" + bottom: "conv4_3_norm_mbox_loc_flat" + bottom: "fc7_mbox_loc_flat" + bottom: "conv6_2_mbox_loc_flat" + bottom: "conv7_2_mbox_loc_flat" + bottom: "conv8_2_mbox_loc_flat" + bottom: "conv9_2_mbox_loc_flat" + top: "mbox_loc" + concat_param { + axis: 1 + } +} +layer { + name: "mbox_conf" + type: "Concat" + bottom: "conv4_3_norm_mbox_conf_flat" + bottom: "fc7_mbox_conf_flat" + bottom: "conv6_2_mbox_conf_flat" + bottom: "conv7_2_mbox_conf_flat" + bottom: "conv8_2_mbox_conf_flat" + bottom: "conv9_2_mbox_conf_flat" + top: "mbox_conf" + concat_param { + axis: 1 + } +} +layer { + name: "mbox_priorbox" + type: "Concat" + bottom: "conv4_3_norm_mbox_priorbox" + bottom: "fc7_mbox_priorbox" + bottom: "conv6_2_mbox_priorbox" + bottom: "conv7_2_mbox_priorbox" + bottom: "conv8_2_mbox_priorbox" + bottom: "conv9_2_mbox_priorbox" + top: "mbox_priorbox" + concat_param { + axis: 2 + } +} + +layer { + name: "mbox_conf_reshape" + type: "Reshape" + bottom: "mbox_conf" + top: "mbox_conf_reshape" + reshape_param { + shape { + dim: 0 + dim: -1 + dim: 2 + } + } +} +layer { + name: "mbox_conf_softmax" + type: "Softmax" + bottom: "mbox_conf_reshape" + top: "mbox_conf_softmax" + softmax_param { + axis: 2 + } +} +layer { + name: "mbox_conf_flatten" + type: "Flatten" + bottom: "mbox_conf_softmax" + top: "mbox_conf_flatten" + flatten_param { + axis: 1 + } +} + +layer { + name: "detection_out" + type: "DetectionOutput" + bottom: "mbox_loc" + bottom: "mbox_conf_flatten" + bottom: "mbox_priorbox" + top: "detection_out" + include { + phase: TEST + } + detection_output_param { + num_classes: 2 + share_location: true + background_label_id: 0 + nms_param { + nms_threshold: 0.45 + top_k: 400 + } + code_type: CENTER_SIZE + keep_top_k: 200 + confidence_threshold: 0.01 + clip: 1 + } +} diff --git a/deploy_upperbody.prototxt b/deploy_upperbody.prototxt new file mode 100644 index 0000000..88021a9 --- /dev/null +++ b/deploy_upperbody.prototxt @@ -0,0 +1,3102 @@ +name: "MobileNet-SSD" +input: "data" +input_shape { + dim: 1 + dim: 3 + dim: 300 + dim: 300 +} +layer { + name: "conv0" + type: "Convolution" + bottom: "data" + top: "conv0" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + convolution_param { + num_output: 32 + bias_term: false + pad: 1 + kernel_size: 3 + stride: 2 + weight_filler { + type: "msra" + } + } +} +layer { + name: "conv0/bn" + type: "BatchNorm" + bottom: "conv0" + top: "conv0" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } +} +layer { + name: "conv0/scale" + type: "Scale" + bottom: "conv0" + top: "conv0" + param { + lr_mult: 0.1 + decay_mult: 0.0 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + scale_param { + filler { + value: 1 + } + bias_term: true + bias_filler { + value: 0 + } + } +} +layer { + name: "conv0/relu" + type: "ReLU" + bottom: "conv0" + top: "conv0" +} +layer { + name: "conv1/dw" + type: "Convolution" + bottom: "conv0" + top: "conv1/dw" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + convolution_param { + num_output: 32 + bias_term: false + pad: 1 + kernel_size: 3 + group: 32 + #engine: CAFFE + weight_filler { + type: "msra" + } + } +} +layer { + name: "conv1/dw/bn" + type: "BatchNorm" + bottom: "conv1/dw" + top: "conv1/dw" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } +} +layer { + name: "conv1/dw/scale" + type: "Scale" + bottom: "conv1/dw" + top: "conv1/dw" + param { + lr_mult: 0.1 + decay_mult: 0.0 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + scale_param { + filler { + value: 1 + } + bias_term: true + bias_filler { + value: 0 + } + } +} +layer { + name: "conv1/dw/relu" + type: "ReLU" + bottom: "conv1/dw" + top: "conv1/dw" +} +layer { + name: "conv1" + type: "Convolution" + bottom: "conv1/dw" + top: "conv1" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + convolution_param { + num_output: 64 + bias_term: false + kernel_size: 1 + weight_filler { + type: "msra" + } + } +} +layer { + name: "conv1/bn" + type: "BatchNorm" + bottom: "conv1" + top: "conv1" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } +} +layer { + name: "conv1/scale" + type: "Scale" + bottom: "conv1" + top: "conv1" + param { + lr_mult: 0.1 + decay_mult: 0.0 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + scale_param { + filler { + value: 1 + } + bias_term: true + bias_filler { + value: 0 + } + } +} +layer { + name: "conv1/relu" + type: "ReLU" + bottom: "conv1" + top: "conv1" +} +layer { + name: "conv2/dw" + type: "Convolution" + bottom: "conv1" + top: "conv2/dw" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + convolution_param { + num_output: 64 + bias_term: false + pad: 1 + kernel_size: 3 + stride: 2 + group: 64 + #engine: CAFFE + weight_filler { + type: "msra" + } + } +} +layer { + name: "conv2/dw/bn" + type: "BatchNorm" + bottom: "conv2/dw" + top: "conv2/dw" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } +} +layer { + name: "conv2/dw/scale" + type: "Scale" + bottom: "conv2/dw" + top: "conv2/dw" + param { + lr_mult: 0.1 + decay_mult: 0.0 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + scale_param { + filler { + value: 1 + } + bias_term: true + bias_filler { + value: 0 + } + } +} +layer { + name: "conv2/dw/relu" + type: "ReLU" + bottom: "conv2/dw" + top: "conv2/dw" +} +layer { + name: "conv2" + type: "Convolution" + bottom: "conv2/dw" + top: "conv2" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + convolution_param { + num_output: 128 + bias_term: false + kernel_size: 1 + weight_filler { + type: "msra" + } + } +} +layer { + name: "conv2/bn" + type: "BatchNorm" + bottom: "conv2" + top: "conv2" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } +} +layer { + name: "conv2/scale" + type: "Scale" + bottom: "conv2" + top: "conv2" + param { + lr_mult: 0.1 + decay_mult: 0.0 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + scale_param { + filler { + value: 1 + } + bias_term: true + bias_filler { + value: 0 + } + } +} +layer { + name: "conv2/relu" + type: "ReLU" + bottom: "conv2" + top: "conv2" +} +layer { + name: "conv3/dw" + type: "Convolution" + bottom: "conv2" + top: "conv3/dw" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + convolution_param { + num_output: 128 + bias_term: false + pad: 1 + kernel_size: 3 + group: 128 + #engine: CAFFE + weight_filler { + type: "msra" + } + } +} +layer { + name: "conv3/dw/bn" + type: "BatchNorm" + bottom: "conv3/dw" + top: "conv3/dw" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } +} +layer { + name: "conv3/dw/scale" + type: "Scale" + bottom: "conv3/dw" + top: "conv3/dw" + param { + lr_mult: 0.1 + decay_mult: 0.0 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + scale_param { + filler { + value: 1 + } + bias_term: true + bias_filler { + value: 0 + } + } +} +layer { + name: "conv3/dw/relu" + type: "ReLU" + bottom: "conv3/dw" + top: "conv3/dw" +} +layer { + name: "conv3" + type: "Convolution" + bottom: "conv3/dw" + top: "conv3" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + convolution_param { + num_output: 128 + bias_term: false + kernel_size: 1 + weight_filler { + type: "msra" + } + } +} +layer { + name: "conv3/bn" + type: "BatchNorm" + bottom: "conv3" + top: "conv3" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } +} +layer { + name: "conv3/scale" + type: "Scale" + bottom: "conv3" + top: "conv3" + param { + lr_mult: 0.1 + decay_mult: 0.0 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + scale_param { + filler { + value: 1 + } + bias_term: true + bias_filler { + value: 0 + } + } +} +layer { + name: "conv3/relu" + type: "ReLU" + bottom: "conv3" + top: "conv3" +} +layer { + name: "conv4/dw" + type: "Convolution" + bottom: "conv3" + top: "conv4/dw" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + convolution_param { + num_output: 128 + bias_term: false + pad: 1 + kernel_size: 3 + stride: 2 + group: 128 + #engine: CAFFE + weight_filler { + type: "msra" + } + } +} +layer { + name: "conv4/dw/bn" + type: "BatchNorm" + bottom: "conv4/dw" + top: "conv4/dw" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } +} +layer { + name: "conv4/dw/scale" + type: "Scale" + bottom: "conv4/dw" + top: "conv4/dw" + param { + lr_mult: 0.1 + decay_mult: 0.0 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + scale_param { + filler { + value: 1 + } + bias_term: true + bias_filler { + value: 0 + } + } +} +layer { + name: "conv4/dw/relu" + type: "ReLU" + bottom: "conv4/dw" + top: "conv4/dw" +} +layer { + name: "conv4" + type: "Convolution" + bottom: "conv4/dw" + top: "conv4" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + convolution_param { + num_output: 256 + bias_term: false + kernel_size: 1 + weight_filler { + type: "msra" + } + } +} +layer { + name: "conv4/bn" + type: "BatchNorm" + bottom: "conv4" + top: "conv4" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } +} +layer { + name: "conv4/scale" + type: "Scale" + bottom: "conv4" + top: "conv4" + param { + lr_mult: 0.1 + decay_mult: 0.0 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + scale_param { + filler { + value: 1 + } + bias_term: true + bias_filler { + value: 0 + } + } +} +layer { + name: "conv4/relu" + type: "ReLU" + bottom: "conv4" + top: "conv4" +} +layer { + name: "conv5/dw" + type: "Convolution" + bottom: "conv4" + top: "conv5/dw" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + convolution_param { + num_output: 256 + bias_term: false + pad: 1 + kernel_size: 3 + group: 256 + #engine: CAFFE + weight_filler { + type: "msra" + } + } +} +layer { + name: "conv5/dw/bn" + type: "BatchNorm" + bottom: "conv5/dw" + top: "conv5/dw" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } +} +layer { + name: "conv5/dw/scale" + type: "Scale" + bottom: "conv5/dw" + top: "conv5/dw" + param { + lr_mult: 0.1 + decay_mult: 0.0 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + scale_param { + filler { + value: 1 + } + bias_term: true + bias_filler { + value: 0 + } + } +} +layer { + name: "conv5/dw/relu" + type: "ReLU" + bottom: "conv5/dw" + top: "conv5/dw" +} +layer { + name: "conv5" + type: "Convolution" + bottom: "conv5/dw" + top: "conv5" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + convolution_param { + num_output: 256 + bias_term: false + kernel_size: 1 + weight_filler { + type: "msra" + } + } +} +layer { + name: "conv5/bn" + type: "BatchNorm" + bottom: "conv5" + top: "conv5" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } +} +layer { + name: "conv5/scale" + type: "Scale" + bottom: "conv5" + top: "conv5" + param { + lr_mult: 0.1 + decay_mult: 0.0 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + scale_param { + filler { + value: 1 + } + bias_term: true + bias_filler { + value: 0 + } + } +} +layer { + name: "conv5/relu" + type: "ReLU" + bottom: "conv5" + top: "conv5" +} +layer { + name: "conv6/dw" + type: "Convolution" + bottom: "conv5" + top: "conv6/dw" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + convolution_param { + num_output: 256 + bias_term: false + pad: 1 + kernel_size: 3 + stride: 2 + group: 256 + #engine: CAFFE + weight_filler { + type: "msra" + } + } +} +layer { + name: "conv6/dw/bn" + type: "BatchNorm" + bottom: "conv6/dw" + top: "conv6/dw" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } +} +layer { + name: "conv6/dw/scale" + type: "Scale" + bottom: "conv6/dw" + top: "conv6/dw" + param { + lr_mult: 0.1 + decay_mult: 0.0 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + scale_param { + filler { + value: 1 + } + bias_term: true + bias_filler { + value: 0 + } + } +} +layer { + name: "conv6/dw/relu" + type: "ReLU" + bottom: "conv6/dw" + top: "conv6/dw" +} +layer { + name: "conv6" + type: "Convolution" + bottom: "conv6/dw" + top: "conv6" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + convolution_param { + num_output: 512 + bias_term: false + kernel_size: 1 + weight_filler { + type: "msra" + } + } +} +layer { + name: "conv6/bn" + type: "BatchNorm" + bottom: "conv6" + top: "conv6" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } +} +layer { + name: "conv6/scale" + type: "Scale" + bottom: "conv6" + top: "conv6" + param { + lr_mult: 0.1 + decay_mult: 0.0 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + scale_param { + filler { + value: 1 + } + bias_term: true + bias_filler { + value: 0 + } + } +} +layer { + name: "conv6/relu" + type: "ReLU" + bottom: "conv6" + top: "conv6" +} +layer { + name: "conv7/dw" + type: "Convolution" + bottom: "conv6" + top: "conv7/dw" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + convolution_param { + num_output: 512 + bias_term: false + pad: 1 + kernel_size: 3 + group: 512 + #engine: CAFFE + weight_filler { + type: "msra" + } + } +} +layer { + name: "conv7/dw/bn" + type: "BatchNorm" + bottom: "conv7/dw" + top: "conv7/dw" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } +} +layer { + name: "conv7/dw/scale" + type: "Scale" + bottom: "conv7/dw" + top: "conv7/dw" + param { + lr_mult: 0.1 + decay_mult: 0.0 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + scale_param { + filler { + value: 1 + } + bias_term: true + bias_filler { + value: 0 + } + } +} +layer { + name: "conv7/dw/relu" + type: "ReLU" + bottom: "conv7/dw" + top: "conv7/dw" +} +layer { + name: "conv7" + type: "Convolution" + bottom: "conv7/dw" + top: "conv7" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + convolution_param { + num_output: 512 + bias_term: false + kernel_size: 1 + weight_filler { + type: "msra" + } + } +} +layer { + name: "conv7/bn" + type: "BatchNorm" + bottom: "conv7" + top: "conv7" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } +} +layer { + name: "conv7/scale" + type: "Scale" + bottom: "conv7" + top: "conv7" + param { + lr_mult: 0.1 + decay_mult: 0.0 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + scale_param { + filler { + value: 1 + } + bias_term: true + bias_filler { + value: 0 + } + } +} +layer { + name: "conv7/relu" + type: "ReLU" + bottom: "conv7" + top: "conv7" +} +layer { + name: "conv8/dw" + type: "Convolution" + bottom: "conv7" + top: "conv8/dw" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + convolution_param { + num_output: 512 + bias_term: false + pad: 1 + kernel_size: 3 + group: 512 + #engine: CAFFE + weight_filler { + type: "msra" + } + } +} +layer { + name: "conv8/dw/bn" + type: "BatchNorm" + bottom: "conv8/dw" + top: "conv8/dw" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } +} +layer { + name: "conv8/dw/scale" + type: "Scale" + bottom: "conv8/dw" + top: "conv8/dw" + param { + lr_mult: 0.1 + decay_mult: 0.0 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + scale_param { + filler { + value: 1 + } + bias_term: true + bias_filler { + value: 0 + } + } +} +layer { + name: "conv8/dw/relu" + type: "ReLU" + bottom: "conv8/dw" + top: "conv8/dw" +} +layer { + name: "conv8" + type: "Convolution" + bottom: "conv8/dw" + top: "conv8" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + convolution_param { + num_output: 512 + bias_term: false + kernel_size: 1 + weight_filler { + type: "msra" + } + } +} +layer { + name: "conv8/bn" + type: "BatchNorm" + bottom: "conv8" + top: "conv8" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } +} +layer { + name: "conv8/scale" + type: "Scale" + bottom: "conv8" + top: "conv8" + param { + lr_mult: 0.1 + decay_mult: 0.0 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + scale_param { + filler { + value: 1 + } + bias_term: true + bias_filler { + value: 0 + } + } +} +layer { + name: "conv8/relu" + type: "ReLU" + bottom: "conv8" + top: "conv8" +} +layer { + name: "conv9/dw" + type: "Convolution" + bottom: "conv8" + top: "conv9/dw" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + convolution_param { + num_output: 512 + bias_term: false + pad: 1 + kernel_size: 3 + group: 512 + #engine: CAFFE + weight_filler { + type: "msra" + } + } +} +layer { + name: "conv9/dw/bn" + type: "BatchNorm" + bottom: "conv9/dw" + top: "conv9/dw" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } +} +layer { + name: "conv9/dw/scale" + type: "Scale" + bottom: "conv9/dw" + top: "conv9/dw" + param { + lr_mult: 0.1 + decay_mult: 0.0 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + scale_param { + filler { + value: 1 + } + bias_term: true + bias_filler { + value: 0 + } + } +} +layer { + name: "conv9/dw/relu" + type: "ReLU" + bottom: "conv9/dw" + top: "conv9/dw" +} +layer { + name: "conv9" + type: "Convolution" + bottom: "conv9/dw" + top: "conv9" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + convolution_param { + num_output: 512 + bias_term: false + kernel_size: 1 + weight_filler { + type: "msra" + } + } +} +layer { + name: "conv9/bn" + type: "BatchNorm" + bottom: "conv9" + top: "conv9" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } +} +layer { + name: "conv9/scale" + type: "Scale" + bottom: "conv9" + top: "conv9" + param { + lr_mult: 0.1 + decay_mult: 0.0 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + scale_param { + filler { + value: 1 + } + bias_term: true + bias_filler { + value: 0 + } + } +} +layer { + name: "conv9/relu" + type: "ReLU" + bottom: "conv9" + top: "conv9" +} +layer { + name: "conv10/dw" + type: "Convolution" + bottom: "conv9" + top: "conv10/dw" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + convolution_param { + num_output: 512 + bias_term: false + pad: 1 + kernel_size: 3 + group: 512 + #engine: CAFFE + weight_filler { + type: "msra" + } + } +} +layer { + name: "conv10/dw/bn" + type: "BatchNorm" + bottom: "conv10/dw" + top: "conv10/dw" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } +} +layer { + name: "conv10/dw/scale" + type: "Scale" + bottom: "conv10/dw" + top: "conv10/dw" + param { + lr_mult: 0.1 + decay_mult: 0.0 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + scale_param { + filler { + value: 1 + } + bias_term: true + bias_filler { + value: 0 + } + } +} +layer { + name: "conv10/dw/relu" + type: "ReLU" + bottom: "conv10/dw" + top: "conv10/dw" +} +layer { + name: "conv10" + type: "Convolution" + bottom: "conv10/dw" + top: "conv10" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + convolution_param { + num_output: 512 + bias_term: false + kernel_size: 1 + weight_filler { + type: "msra" + } + } +} +layer { + name: "conv10/bn" + type: "BatchNorm" + bottom: "conv10" + top: "conv10" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } +} +layer { + name: "conv10/scale" + type: "Scale" + bottom: "conv10" + top: "conv10" + param { + lr_mult: 0.1 + decay_mult: 0.0 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + scale_param { + filler { + value: 1 + } + bias_term: true + bias_filler { + value: 0 + } + } +} +layer { + name: "conv10/relu" + type: "ReLU" + bottom: "conv10" + top: "conv10" +} +layer { + name: "conv11/dw" + type: "Convolution" + bottom: "conv10" + top: "conv11/dw" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + convolution_param { + num_output: 512 + bias_term: false + pad: 1 + kernel_size: 3 + group: 512 + #engine: CAFFE + weight_filler { + type: "msra" + } + } +} +layer { + name: "conv11/dw/bn" + type: "BatchNorm" + bottom: "conv11/dw" + top: "conv11/dw" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } +} +layer { + name: "conv11/dw/scale" + type: "Scale" + bottom: "conv11/dw" + top: "conv11/dw" + param { + lr_mult: 0.1 + decay_mult: 0.0 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + scale_param { + filler { + value: 1 + } + bias_term: true + bias_filler { + value: 0 + } + } +} +layer { + name: "conv11/dw/relu" + type: "ReLU" + bottom: "conv11/dw" + top: "conv11/dw" +} +layer { + name: "conv11" + type: "Convolution" + bottom: "conv11/dw" + top: "conv11" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + convolution_param { + num_output: 512 + bias_term: false + kernel_size: 1 + weight_filler { + type: "msra" + } + } +} +layer { + name: "conv11/bn" + type: "BatchNorm" + bottom: "conv11" + top: "conv11" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } +} +layer { + name: "conv11/scale" + type: "Scale" + bottom: "conv11" + top: "conv11" + param { + lr_mult: 0.1 + decay_mult: 0.0 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + scale_param { + filler { + value: 1 + } + bias_term: true + bias_filler { + value: 0 + } + } +} +layer { + name: "conv11/relu" + type: "ReLU" + bottom: "conv11" + top: "conv11" +} +layer { + name: "conv12/dw" + type: "Convolution" + bottom: "conv11" + top: "conv12/dw" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + convolution_param { + num_output: 512 + bias_term: false + pad: 1 + kernel_size: 3 + stride: 2 + group: 512 + #engine: CAFFE + weight_filler { + type: "msra" + } + } +} +layer { + name: "conv12/dw/bn" + type: "BatchNorm" + bottom: "conv12/dw" + top: "conv12/dw" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } +} +layer { + name: "conv12/dw/scale" + type: "Scale" + bottom: "conv12/dw" + top: "conv12/dw" + param { + lr_mult: 0.1 + decay_mult: 0.0 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + scale_param { + filler { + value: 1 + } + bias_term: true + bias_filler { + value: 0 + } + } +} +layer { + name: "conv12/dw/relu" + type: "ReLU" + bottom: "conv12/dw" + top: "conv12/dw" +} +layer { + name: "conv12" + type: "Convolution" + bottom: "conv12/dw" + top: "conv12" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + convolution_param { + num_output: 1024 + bias_term: false + kernel_size: 1 + weight_filler { + type: "msra" + } + } +} +layer { + name: "conv12/bn" + type: "BatchNorm" + bottom: "conv12" + top: "conv12" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } +} +layer { + name: "conv12/scale" + type: "Scale" + bottom: "conv12" + top: "conv12" + param { + lr_mult: 0.1 + decay_mult: 0.0 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + scale_param { + filler { + value: 1 + } + bias_term: true + bias_filler { + value: 0 + } + } +} +layer { + name: "conv12/relu" + type: "ReLU" + bottom: "conv12" + top: "conv12" +} +layer { + name: "conv13/dw" + type: "Convolution" + bottom: "conv12" + top: "conv13/dw" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + convolution_param { + num_output: 1024 + bias_term: false + pad: 1 + kernel_size: 3 + group: 1024 + #engine: CAFFE + weight_filler { + type: "msra" + } + } +} +layer { + name: "conv13/dw/bn" + type: "BatchNorm" + bottom: "conv13/dw" + top: "conv13/dw" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } +} +layer { + name: "conv13/dw/scale" + type: "Scale" + bottom: "conv13/dw" + top: "conv13/dw" + param { + lr_mult: 0.1 + decay_mult: 0.0 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + scale_param { + filler { + value: 1 + } + bias_term: true + bias_filler { + value: 0 + } + } +} +layer { + name: "conv13/dw/relu" + type: "ReLU" + bottom: "conv13/dw" + top: "conv13/dw" +} +layer { + name: "conv13" + type: "Convolution" + bottom: "conv13/dw" + top: "conv13" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + convolution_param { + num_output: 1024 + bias_term: false + kernel_size: 1 + weight_filler { + type: "msra" + } + } +} +layer { + name: "conv13/bn" + type: "BatchNorm" + bottom: "conv13" + top: "conv13" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } +} +layer { + name: "conv13/scale" + type: "Scale" + bottom: "conv13" + top: "conv13" + param { + lr_mult: 0.1 + decay_mult: 0.0 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + scale_param { + filler { + value: 1 + } + bias_term: true + bias_filler { + value: 0 + } + } +} +layer { + name: "conv13/relu" + type: "ReLU" + bottom: "conv13" + top: "conv13" +} +layer { + name: "conv14_1" + type: "Convolution" + bottom: "conv13" + top: "conv14_1" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + convolution_param { + num_output: 256 + bias_term: false + kernel_size: 1 + weight_filler { + type: "msra" + } + } +} +layer { + name: "conv14_1/bn" + type: "BatchNorm" + bottom: "conv14_1" + top: "conv14_1" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } +} +layer { + name: "conv14_1/scale" + type: "Scale" + bottom: "conv14_1" + top: "conv14_1" + param { + lr_mult: 0.1 + decay_mult: 0.0 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + scale_param { + filler { + value: 1 + } + bias_term: true + bias_filler { + value: 0 + } + } +} +layer { + name: "conv14_1/relu" + type: "ReLU" + bottom: "conv14_1" + top: "conv14_1" +} +layer { + name: "conv14_2" + type: "Convolution" + bottom: "conv14_1" + top: "conv14_2" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + convolution_param { + num_output: 512 + bias_term: false + pad: 1 + kernel_size: 3 + stride: 2 + weight_filler { + type: "msra" + } + } +} +layer { + name: "conv14_2/bn" + type: "BatchNorm" + bottom: "conv14_2" + top: "conv14_2" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } +} +layer { + name: "conv14_2/scale" + type: "Scale" + bottom: "conv14_2" + top: "conv14_2" + param { + lr_mult: 0.1 + decay_mult: 0.0 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + scale_param { + filler { + value: 1 + } + bias_term: true + bias_filler { + value: 0 + } + } +} +layer { + name: "conv14_2/relu" + type: "ReLU" + bottom: "conv14_2" + top: "conv14_2" +} +layer { + name: "conv15_1" + type: "Convolution" + bottom: "conv14_2" + top: "conv15_1" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + convolution_param { + num_output: 128 + bias_term: false + kernel_size: 1 + weight_filler { + type: "msra" + } + } +} +layer { + name: "conv15_1/bn" + type: "BatchNorm" + bottom: "conv15_1" + top: "conv15_1" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } +} +layer { + name: "conv15_1/scale" + type: "Scale" + bottom: "conv15_1" + top: "conv15_1" + param { + lr_mult: 0.1 + decay_mult: 0.0 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + scale_param { + filler { + value: 1 + } + bias_term: true + bias_filler { + value: 0 + } + } +} +layer { + name: "conv15_1/relu" + type: "ReLU" + bottom: "conv15_1" + top: "conv15_1" +} +layer { + name: "conv15_2" + type: "Convolution" + bottom: "conv15_1" + top: "conv15_2" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + convolution_param { + num_output: 256 + bias_term: false + pad: 1 + kernel_size: 3 + stride: 2 + weight_filler { + type: "msra" + } + } +} +layer { + name: "conv15_2/bn" + type: "BatchNorm" + bottom: "conv15_2" + top: "conv15_2" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } +} +layer { + name: "conv15_2/scale" + type: "Scale" + bottom: "conv15_2" + top: "conv15_2" + param { + lr_mult: 0.1 + decay_mult: 0.0 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + scale_param { + filler { + value: 1 + } + bias_term: true + bias_filler { + value: 0 + } + } +} +layer { + name: "conv15_2/relu" + type: "ReLU" + bottom: "conv15_2" + top: "conv15_2" +} +layer { + name: "conv16_1" + type: "Convolution" + bottom: "conv15_2" + top: "conv16_1" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + convolution_param { + num_output: 128 + bias_term: false + kernel_size: 1 + weight_filler { + type: "msra" + } + } +} +layer { + name: "conv16_1/bn" + type: "BatchNorm" + bottom: "conv16_1" + top: "conv16_1" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } +} +layer { + name: "conv16_1/scale" + type: "Scale" + bottom: "conv16_1" + top: "conv16_1" + param { + lr_mult: 0.1 + decay_mult: 0.0 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + scale_param { + filler { + value: 1 + } + bias_term: true + bias_filler { + value: 0 + } + } +} +layer { + name: "conv16_1/relu" + type: "ReLU" + bottom: "conv16_1" + top: "conv16_1" +} +layer { + name: "conv16_2" + type: "Convolution" + bottom: "conv16_1" + top: "conv16_2" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + convolution_param { + num_output: 256 + bias_term: false + pad: 1 + kernel_size: 3 + stride: 2 + weight_filler { + type: "msra" + } + } +} +layer { + name: "conv16_2/bn" + type: "BatchNorm" + bottom: "conv16_2" + top: "conv16_2" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } +} +layer { + name: "conv16_2/scale" + type: "Scale" + bottom: "conv16_2" + top: "conv16_2" + param { + lr_mult: 0.1 + decay_mult: 0.0 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + scale_param { + filler { + value: 1 + } + bias_term: true + bias_filler { + value: 0 + } + } +} +layer { + name: "conv16_2/relu" + type: "ReLU" + bottom: "conv16_2" + top: "conv16_2" +} +layer { + name: "conv17_1" + type: "Convolution" + bottom: "conv16_2" + top: "conv17_1" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + convolution_param { + num_output: 64 + bias_term: false + kernel_size: 1 + weight_filler { + type: "msra" + } + } +} +layer { + name: "conv17_1/bn" + type: "BatchNorm" + bottom: "conv17_1" + top: "conv17_1" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } +} +layer { + name: "conv17_1/scale" + type: "Scale" + bottom: "conv17_1" + top: "conv17_1" + param { + lr_mult: 0.1 + decay_mult: 0.0 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + scale_param { + filler { + value: 1 + } + bias_term: true + bias_filler { + value: 0 + } + } +} +layer { + name: "conv17_1/relu" + type: "ReLU" + bottom: "conv17_1" + top: "conv17_1" +} +layer { + name: "conv17_2" + type: "Convolution" + bottom: "conv17_1" + top: "conv17_2" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + convolution_param { + num_output: 128 + bias_term: false + pad: 1 + kernel_size: 3 + stride: 2 + weight_filler { + type: "msra" + } + } +} +layer { + name: "conv17_2/bn" + type: "BatchNorm" + bottom: "conv17_2" + top: "conv17_2" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } +} +layer { + name: "conv17_2/scale" + type: "Scale" + bottom: "conv17_2" + top: "conv17_2" + param { + lr_mult: 0.1 + decay_mult: 0.0 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + scale_param { + filler { + value: 1 + } + bias_term: true + bias_filler { + value: 0 + } + } +} +layer { + name: "conv17_2/relu" + type: "ReLU" + bottom: "conv17_2" + top: "conv17_2" +} +layer { + name: "conv11_mbox_loc" + type: "Convolution" + bottom: "conv11" + top: "conv11_mbox_loc" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + convolution_param { + num_output: 12 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv11_mbox_loc_perm" + type: "Permute" + bottom: "conv11_mbox_loc" + top: "conv11_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv11_mbox_loc_flat" + type: "Flatten" + bottom: "conv11_mbox_loc_perm" + top: "conv11_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv11_mbox_conf" + type: "Convolution" + bottom: "conv11" + top: "conv11_mbox_conf" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 63 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv11_mbox_conf_perm" + type: "Permute" + bottom: "conv11_mbox_conf" + top: "conv11_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv11_mbox_conf_flat" + type: "Flatten" + bottom: "conv11_mbox_conf_perm" + top: "conv11_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv11_mbox_priorbox" + type: "PriorBox" + bottom: "conv11" + bottom: "data" + top: "conv11_mbox_priorbox" + prior_box_param { + min_size: 60.0 + aspect_ratio: 2.0 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + offset: 0.5 + } +} +layer { + name: "conv13_mbox_loc" + type: "Convolution" + bottom: "conv13" + top: "conv13_mbox_loc" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + convolution_param { + num_output: 24 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv13_mbox_loc_perm" + type: "Permute" + bottom: "conv13_mbox_loc" + top: "conv13_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv13_mbox_loc_flat" + type: "Flatten" + bottom: "conv13_mbox_loc_perm" + top: "conv13_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv13_mbox_conf" + type: "Convolution" + bottom: "conv13" + top: "conv13_mbox_conf" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 126 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv13_mbox_conf_perm" + type: "Permute" + bottom: "conv13_mbox_conf" + top: "conv13_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv13_mbox_conf_flat" + type: "Flatten" + bottom: "conv13_mbox_conf_perm" + top: "conv13_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv13_mbox_priorbox" + type: "PriorBox" + bottom: "conv13" + bottom: "data" + top: "conv13_mbox_priorbox" + prior_box_param { + min_size: 105.0 + max_size: 150.0 + aspect_ratio: 2.0 + aspect_ratio: 3.0 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + offset: 0.5 + } +} +layer { + name: "conv14_2_mbox_loc" + type: "Convolution" + bottom: "conv14_2" + top: "conv14_2_mbox_loc" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + convolution_param { + num_output: 24 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv14_2_mbox_loc_perm" + type: "Permute" + bottom: "conv14_2_mbox_loc" + top: "conv14_2_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv14_2_mbox_loc_flat" + type: "Flatten" + bottom: "conv14_2_mbox_loc_perm" + top: "conv14_2_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv14_2_mbox_conf" + type: "Convolution" + bottom: "conv14_2" + top: "conv14_2_mbox_conf" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 126 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv14_2_mbox_conf_perm" + type: "Permute" + bottom: "conv14_2_mbox_conf" + top: "conv14_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv14_2_mbox_conf_flat" + type: "Flatten" + bottom: "conv14_2_mbox_conf_perm" + top: "conv14_2_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv14_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv14_2" + bottom: "data" + top: "conv14_2_mbox_priorbox" + prior_box_param { + min_size: 150.0 + max_size: 195.0 + aspect_ratio: 2.0 + aspect_ratio: 3.0 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + offset: 0.5 + } +} +layer { + name: "conv15_2_mbox_loc" + type: "Convolution" + bottom: "conv15_2" + top: "conv15_2_mbox_loc" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + convolution_param { + num_output: 24 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv15_2_mbox_loc_perm" + type: "Permute" + bottom: "conv15_2_mbox_loc" + top: "conv15_2_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv15_2_mbox_loc_flat" + type: "Flatten" + bottom: "conv15_2_mbox_loc_perm" + top: "conv15_2_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv15_2_mbox_conf" + type: "Convolution" + bottom: "conv15_2" + top: "conv15_2_mbox_conf" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 126 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv15_2_mbox_conf_perm" + type: "Permute" + bottom: "conv15_2_mbox_conf" + top: "conv15_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv15_2_mbox_conf_flat" + type: "Flatten" + bottom: "conv15_2_mbox_conf_perm" + top: "conv15_2_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv15_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv15_2" + bottom: "data" + top: "conv15_2_mbox_priorbox" + prior_box_param { + min_size: 195.0 + max_size: 240.0 + aspect_ratio: 2.0 + aspect_ratio: 3.0 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + offset: 0.5 + } +} +layer { + name: "conv16_2_mbox_loc" + type: "Convolution" + bottom: "conv16_2" + top: "conv16_2_mbox_loc" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + convolution_param { + num_output: 24 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv16_2_mbox_loc_perm" + type: "Permute" + bottom: "conv16_2_mbox_loc" + top: "conv16_2_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv16_2_mbox_loc_flat" + type: "Flatten" + bottom: "conv16_2_mbox_loc_perm" + top: "conv16_2_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv16_2_mbox_conf" + type: "Convolution" + bottom: "conv16_2" + top: "conv16_2_mbox_conf" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 126 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv16_2_mbox_conf_perm" + type: "Permute" + bottom: "conv16_2_mbox_conf" + top: "conv16_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv16_2_mbox_conf_flat" + type: "Flatten" + bottom: "conv16_2_mbox_conf_perm" + top: "conv16_2_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv16_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv16_2" + bottom: "data" + top: "conv16_2_mbox_priorbox" + prior_box_param { + min_size: 240.0 + max_size: 285.0 + aspect_ratio: 2.0 + aspect_ratio: 3.0 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + offset: 0.5 + } +} +layer { + name: "conv17_2_mbox_loc" + type: "Convolution" + bottom: "conv17_2" + top: "conv17_2_mbox_loc" + param { + lr_mult: 0.1 + decay_mult: 0.1 + } + param { + lr_mult: 0.2 + decay_mult: 0.0 + } + convolution_param { + num_output: 24 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv17_2_mbox_loc_perm" + type: "Permute" + bottom: "conv17_2_mbox_loc" + top: "conv17_2_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv17_2_mbox_loc_flat" + type: "Flatten" + bottom: "conv17_2_mbox_loc_perm" + top: "conv17_2_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv17_2_mbox_conf" + type: "Convolution" + bottom: "conv17_2" + top: "conv17_2_mbox_conf" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 126 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv17_2_mbox_conf_perm" + type: "Permute" + bottom: "conv17_2_mbox_conf" + top: "conv17_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv17_2_mbox_conf_flat" + type: "Flatten" + bottom: "conv17_2_mbox_conf_perm" + top: "conv17_2_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv17_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv17_2" + bottom: "data" + top: "conv17_2_mbox_priorbox" + prior_box_param { + min_size: 285.0 + max_size: 300.0 + aspect_ratio: 2.0 + aspect_ratio: 3.0 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + offset: 0.5 + } +} +layer { + name: "mbox_loc" + type: "Concat" + bottom: "conv11_mbox_loc_flat" + bottom: "conv13_mbox_loc_flat" + bottom: "conv14_2_mbox_loc_flat" + bottom: "conv15_2_mbox_loc_flat" + bottom: "conv16_2_mbox_loc_flat" + bottom: "conv17_2_mbox_loc_flat" + top: "mbox_loc" + concat_param { + axis: 1 + } +} +layer { + name: "mbox_conf" + type: "Concat" + bottom: "conv11_mbox_conf_flat" + bottom: "conv13_mbox_conf_flat" + bottom: "conv14_2_mbox_conf_flat" + bottom: "conv15_2_mbox_conf_flat" + bottom: "conv16_2_mbox_conf_flat" + bottom: "conv17_2_mbox_conf_flat" + top: "mbox_conf" + concat_param { + axis: 1 + } +} +layer { + name: "mbox_priorbox" + type: "Concat" + bottom: "conv11_mbox_priorbox" + bottom: "conv13_mbox_priorbox" + bottom: "conv14_2_mbox_priorbox" + bottom: "conv15_2_mbox_priorbox" + bottom: "conv16_2_mbox_priorbox" + bottom: "conv17_2_mbox_priorbox" + top: "mbox_priorbox" + concat_param { + axis: 2 + } +} +layer { + name: "mbox_conf_reshape" + type: "Reshape" + bottom: "mbox_conf" + top: "mbox_conf_reshape" + reshape_param { + shape { + dim: 0 + dim: -1 + dim: 21 + } + } +} +layer { + name: "mbox_conf_softmax" + type: "Softmax" + bottom: "mbox_conf_reshape" + top: "mbox_conf_softmax" + softmax_param { + axis: 2 + } +} +layer { + name: "mbox_conf_flatten" + type: "Flatten" + bottom: "mbox_conf_softmax" + top: "mbox_conf_flatten" + flatten_param { + axis: 1 + } +} +layer { + name: "detection_out" + type: "DetectionOutput" + bottom: "mbox_loc" + bottom: "mbox_conf_flatten" + bottom: "mbox_priorbox" + top: "detection_out" + include { + phase: TEST + } + detection_output_param { + num_classes: 21 + share_location: true + background_label_id: 0 + nms_param { + nms_threshold: 0.45 + top_k: 100 + } + code_type: CENTER_SIZE + keep_top_k: 100 + confidence_threshold: 0.25 + } +} diff --git a/gui.ui b/gui.ui new file mode 100644 index 0000000..74c0306 --- /dev/null +++ b/gui.ui @@ -0,0 +1,187 @@ + + + Dialog + + + + 0 + 0 + 1151 + 676 + + + + Dialog + + + + + 370 + 30 + 691 + 521 + + + + + + + + + + 340 + 610 + 85 + 27 + + + + Play Video + + + + + + 20 + 30 + 326 + 521 + + + + + + + + + Load Video + + + + + + + + + + + + + + + + + Motion Magnification + + + + + Color Magnification + + + + + + + + + + Maginfication Factor + + + + + + + + + + + + + + Spatial Frequency Cutoff + + + + + + + + + + + + + + Temporal Frequency Passband + + + + + + + + + + + + + + + + + + + + + + + + + Chromatic Attenuation + + + + + + + 1.000000000000000 + + + 0.100000000000000 + + + + + + + + + Linear Attenuation + + + + + + + Start + + + + + + + + + + Qt::AlignCenter + + + + + + + + + diff --git a/ideal_filter.py b/ideal_filter.py new file mode 100644 index 0000000..5976e9a --- /dev/null +++ b/ideal_filter.py @@ -0,0 +1,108 @@ +import scipy.fftpack as fftpack +import numpy as np +import cv2 +from scipy.signal import find_peaks + +import pyramid +import video + +def start(vidFile, alpha, low, high, chromAttenuation, fps, width, height): + ''' + Performs color magnification on the video by applying an ideal bandpass filter, + i.e. applies a discrete fourier transform on the gaussian downsapled video and + cuts off the frequencies outside the bandpass filter, magnifies the result and + saves the output video. Additionally, it detects heartbeat and returns BPM. + + :param vidFile: Video file + :param alpha: Magnification factor + :param low: Low frequency cut-off + :param high: High frequency cut-off + :param chromAttenuation: Chrominance attenuation factor + :param mode: Processing mode (unused in this example, but kept for compatibility) + :param fps: Frames per second of the video + :param width: Width of the video frame + :param height: Height of the video frame + :return: final processed video, heart rate in BPM + ''' + + # Convert from RGB to YIQ for better processing of chrominance information + t = video.rgb2yiq(vidFile) + + levels = 4 + + # Build Gaussian pyramid and use the highest level + gauss_video_list = pyramid.gaussian_video(t, levels) + + print('Apply Ideal filter') + # Apply discrete Fourier transformation (real) + fft = fftpack.rfft(gauss_video_list, axis=0) + frequencies = fftpack.rfftfreq(fft.shape[0], d=1.0 / fps) # Sample frequencies + mask = np.logical_and(frequencies > low, frequencies < high) # Logical array if values between low and high frequencies + + fft[~mask] = 0 # Cutoff values outside the bandpass + + filtered = fftpack.irfft(fft, axis=0) # Inverse Fourier transformation + + filtered *= alpha # Magnification + + # Chromatic attenuation + filtered[:, :, :, 1] *= chromAttenuation + filtered[:, :, :, 2] *= chromAttenuation + + print(chromAttenuation) + + # Resize last Gaussian level to the frame size + filtered_video_list = np.zeros(t.shape) + for i in range(t.shape[0]): + f = filtered[i] + filtered_video_list[i] = cv2.resize(f, (t.shape[2], t.shape[1])) + + final = filtered_video_list + + # Add to original + final += t + + # Convert back from YIQ to RGB + final = video.yiq2rgb(final) + + # Cutoff invalid values + final[final < 0] = 0 + final[final > 255] = 255 + + # Detect heartbeat and return BPM + bpm = detect_heartbeat(filtered_video_list, fps) + + return final, bpm + + +def detect_heartbeat(video_frames, fps): + ''' + Detects heartbeat by analyzing pixel intensity variations in the filtered video over time. + + :param video_frames: Processed video frames (filtered and magnified) + :param fps: Frames per second of the video + :return: Detected heart rate in BPM (beats per minute) + ''' + # Focus on the green channel for heart rate detection (more sensitive to blood flow changes) + green_channel = video_frames[:, :, :, 1] # Extract green channel + + # Calculate the average intensity of the green channel for each frame + avg_intensity = np.mean(green_channel, axis=(1, 2)) # Shape: (num_frames,) + + # Normalize intensity values + avg_intensity -= np.mean(avg_intensity) + avg_intensity /= np.std(avg_intensity) + + # Detect peaks in the intensity signal (peaks correspond to heartbeats) + peaks, _ = find_peaks(avg_intensity, distance=fps // 2) # Ensure at least half a second between peaks + + # Calculate the time differences between peaks to compute the heart rate + peak_intervals = np.diff(peaks) / fps # Convert frame intervals to seconds + + if len(peak_intervals) > 0: + avg_heartbeat_interval = np.mean(peak_intervals) + bpm = 60 / avg_heartbeat_interval # Convert to beats per minute + else: + bpm = 0 # No peaks detected + + return bpm diff --git a/main.py b/main.py new file mode 100755 index 0000000..6b6005a --- /dev/null +++ b/main.py @@ -0,0 +1,113 @@ +#.................................. +#........Visualisierung 2.......... +#.................................. +#...Eulerian Video Magnification... +#.................................. +#.. Author: Galya Pavlova.......... +#.................................. + + +import os +import sys +import cv2 +from PyQt5.QtCore import QTimer +from PyQt5.QtGui import QPixmap, QImage +from PyQt5.QtWidgets import QApplication, QDialog, QFileDialog +from PyQt5.uic import loadUi + +import butterworth_filter +import ideal_filter + + +class App(QDialog): + + def __init__(self): + ''' + Initializes and loads the GUI PyQt file + ''' + super(App, self).__init__() + self.vid = None + self.name = None + self.capture = None + self.len = None + self.l = 0 + loadUi('gui.ui', self) + self.startButton.clicked.connect(self.on_start_clicked) + self.lButton.clicked.connect(self.open_file) + self.playButton.clicked.connect(self.play_video) + + + def play_video(self): + ''' + A function to play a given video + ''' + self.capture = cv2.VideoCapture(self.videoOut) + frame_rate = self.capture.get(cv2.CAP_PROP_FPS) + self.len = int(self.capture.get(cv2.CAP_PROP_FRAME_COUNT)) + + self.timer = QTimer(self) + self.timer.timeout.connect(self.dispayImage) + self.timer.start(frame_rate) + + def dispayImage(self): + ''' + Each video frame is read and loaded + ''' + self.l += 1 + + if self.l >= self.len: + self.timer.stop() + self.timer.deleteLater() + self.l = 0 + + ret, img = self.capture.read() + qformat = QImage.Format_RGB888 + + outImage = QImage(img, img.shape[1], img.shape[0], qformat) + outImage = outImage.rgbSwapped() + self.video.setPixmap(QPixmap.fromImage(outImage)) + + def open_file(self): + ''' + Opens Files + ''' + filename, _ = QFileDialog.getOpenFileName(self, 'Open Video File', '../', 'All Files(*)') + if filename: + self.vid = filename + base = os.path.basename(filename) + self.name = os.path.splitext(base)[0] + self.nameLabel.setText(base) + + def on_start_clicked(self): + ''' + Reads the input from the GUI and uses the parameters to start the program + ''' + self.finished.clear() + QApplication.instance().processEvents() + + alpha = float(self.alpha.text()) + cutoff = float(self.cutoff.text()) + low = float(self.low.text()) + high = float(self.high.text()) + chromAttenuation = float(self.chromAtt.text()) + linearAttenuation = self.linearAtt.isChecked() + mode = self.comboBox.currentIndex() + + if mode == 0: + butterworth_filter.start(self.vid, alpha, cutoff, low, high, linearAttenuation, chromAttenuation, self.name) + else: + if mode == 1: + ideal_filter.start(self.vid, alpha, low, high, chromAttenuation, self.name) + + self.finished.setText('Done!') + + self.videoOut = self.name+"Out.avi" + + +if __name__ == "__main__": + app = QApplication(sys.argv) + window = App() + window.setWindowTitle('Eulerian Video Magnification') + window.show() + sys.exit(app.exec_()) + diff --git a/main_showcase.py b/main_showcase.py new file mode 100644 index 0000000..8ae87da --- /dev/null +++ b/main_showcase.py @@ -0,0 +1,398 @@ +import sys +import cv2 +import numpy as np +from PyQt5.QtWidgets import ( + QApplication, QWidget, QFormLayout, QPushButton, QLabel, QHBoxLayout, QVBoxLayout, QComboBox +) +from PyQt5.QtGui import QImage, QPixmap +from PyQt5.QtCore import QTimer, Qt, QThread, pyqtSignal, QElapsedTimer + +import ideal_filter # Ensure this is properly implemented +import butterworth_filter # Ensure this is properly implemented + + +class CameraThread(QThread): + frame_ready = pyqtSignal(np.ndarray) + + def __init__(self): + super().__init__() + self.is_running = True + self.cap = cv2.VideoCapture(0) + self.fps = self.cap.get(cv2.CAP_PROP_FPS) or 30 + + def run(self): + while self.is_running: + ret, frame = self.cap.read() + if ret: + self.frame_ready.emit(frame) + else: + print("Error: Could not read frame from camera.") + self.msleep(int(1000 / self.fps)) + + def stop(self): + self.is_running = False + self.cap.release() + + +class FilterWorker(QThread): + result_ready = pyqtSignal(np.ndarray, float) + + def __init__(self, buffer, alpha, chromAttenuation, fps, filter_type="Ideal", width=512, height=512, time_window=5): + super().__init__() + self.buffer = buffer + self.alpha = alpha + self.chromAttenuation = chromAttenuation + self.fps = fps + self.width = width + self.height = height + self.time_window = time_window + self.is_running = True + self.filter_type = filter_type + self.low, self.high = (1, 2.5) if filter_type == "Ideal" else (0.1, 0.5) + + def run(self): + + + + if self.filter_type == "Ideal": + final_video, bpm = ideal_filter.start( + vidFile=self.buffer, + alpha=self.alpha, + low=self.low, + high=self.high, + chromAttenuation=self.chromAttenuation, + fps=self.fps, + width=self.width, height=self.height + ) + + elif self.filter_type == "Butterworth": + final_video, bpm = butterworth_filter.start( + video_frames=self.buffer, + alpha=self.alpha, + low=self.low, + high=self.high, + chromAttenuation=self.chromAttenuation, + fps=self.fps, + width=self.width, + height=self.height, + time_window=self.time_window + ) + if self.is_running: + self.result_ready.emit(final_video, bpm) + + def stop(self): + self.is_running = False + +class ParameterGUI(QWidget): + def __init__(self): + super().__init__() + self.setWindowTitle('Video Filtering Display') + self.setFixedSize(1400, 800) + self.setup_ui() + + modelFile = "res10_300x300_ssd_iter_140000_fp16.caffemodel" + configFile = "deploy.prototxt" + self.face_net = cv2.dnn.readNetFromCaffe(configFile, modelFile) + + self.face_buffer = [] + self.video_buffer = [] + self.buffer_length = 0 + + self.elapsed_timer = QElapsedTimer() + self.is_processing = False + self.worker = None + self.camera_thread = None + + + def setup_ui(self): + layout = QVBoxLayout() + + # ComboBoxes for user parameters + self.alphaMenu = QComboBox(self) + alpha_values = [5, 10, 15, 20, 30, 40, 50, 60] + self.alphaMenu.addItems([str(value) for value in alpha_values]) + + self.chromAtt = QComboBox(self) + chrom_values = [0.0001, 0.001,0.01,0.1,0.5] + self.chromAtt.addItems([str(value) for value in chrom_values]) + + self.timeWindowMenu = QComboBox(self) + self.timeWindowMenu.addItems(["5", "10", "15", "20"]) + + self.filterMenu = QComboBox(self) + self.filterMenu.addItems(["Ideal", "Butterworth"]) + + # Form layout for parameters + form_layout = QFormLayout() + form_layout.addRow("Alpha:", self.alphaMenu) + form_layout.addRow("ChromAttenuation:", self.chromAtt) + form_layout.addRow("Filter:", self.filterMenu) + form_layout.addRow("Time Window (seconds):", self.timeWindowMenu) + + self.submitButton = QPushButton('Start Camera') + self.submitButton.clicked.connect(self.start_camera) + form_layout.addRow(self.submitButton) + layout.addLayout(form_layout) + + # Layout for displaying video + video_layout = QHBoxLayout() + self.liveVideoLabel = QLabel(self) + self.liveVideoLabel.setFixedSize(640, 480) + self.processedVideoLabel = QLabel(self) + self.processedVideoLabel.setFixedSize(640, 480) + + video_layout.addWidget(self.liveVideoLabel, alignment=Qt.AlignCenter) + video_layout.addWidget(self.processedVideoLabel, alignment=Qt.AlignCenter) + layout.addLayout(video_layout) + + # BPM and status labels + self.bpmLabel = QLabel('BPM: ', self) + layout.addWidget(self.bpmLabel) + + self.bufferStatusLabel = QLabel('Buffer status: Waiting...', self) + layout.addWidget(self.bufferStatusLabel) + + self.filterStatusLabel = QLabel('Filter status: Not running', self) + layout.addWidget(self.filterStatusLabel) + + self.ParameterStatusLabel = QLabel('No parameters set', self) + layout.addWidget(self.ParameterStatusLabel) + + self.setLayout(layout) + + def start_camera(self): + # Stop existing camera thread if it's running + if self.camera_thread is not None: + self.camera_thread.stop() + self.camera_thread.wait() + + # Stop existing worker thread if it's running + if self.worker is not None: + self.worker.stop() + self.worker.wait() + + # Stop any existing timer for video display + if not hasattr(self, 'timer'): + self.timer = QTimer(self) + if self.timer.isActive(): + self.timer.stop() # Stop any running timer before starting new camera session + + # Reset buffers and status labels + self.face_buffer.clear() + self.video_buffer.clear() + self.is_processing = False + self.bufferStatusLabel.setText('Buffer status: Waiting...') + self.filterStatusLabel.setText('Filter status: Not running') + self.bpmLabel.setText('BPM: ') + + # Fetch parameters from UI + self.alpha = int(self.alphaMenu.currentText()) + self.chromAttenuation = float(self.chromAtt.currentText()) + self.filter = str(self.filterMenu.currentText()) + self.timeWindow = int(self.timeWindowMenu.currentText()) + + # Update the parameter status label + self.ParameterStatusLabel.setText(f'Alpha: {self.alpha} ChromAttenuation: {self.chromAttenuation} TimeWindow: {self.timeWindow}') + + # Start the camera thread + self.camera_thread = CameraThread() # Initialize the new camera thread + self.camera_thread.frame_ready.connect(self.update_frame) + self.camera_thread.start() + + # Set FPS and buffer length based on the camera's FPS + self.fps = self.camera_thread.fps + self.buffer_length = int(self.camera_thread.fps * self.timeWindow) + + # Start the elapsed timer to measure buffering time + self.elapsed_timer.start() + + + def update_frame(self, frame): + if not self.is_processing: + self.bufferStatusLabel.setText('Buffer status: Filling up') + + if self.filter == "Butterworth": + upper_body_region, coords = self.get_upper_body(frame) + if upper_body_region is not None: + upper_body_resized = cv2.resize(upper_body_region, (512, 512)) + self.video_buffer.append(upper_body_resized) + startX, startY, endX, endY = coords + cv2.rectangle(frame, (startX, startY), (endX, endY), (0, 255, 0), 2) + + if self.filter == "Ideal": + face_region = self.get_face(frame) + if face_region is not None: + face_region_resized = cv2.resize(face_region, (512, 512)) + + #Weißabgleich + face_region_resized = cv2.GaussianBlur(face_region_resized,(25,25),0) + face_region_resized = cv2.medianBlur(face_region_resized,25) + + self.face_buffer.append(face_region_resized) + + if self.elapsed_timer.elapsed() >= self.timeWindow * 1000: + self.process_buffers() + self.elapsed_timer.restart() + + + # Display the live frame + frame_display = self.resize_frame(frame, self.liveVideoLabel) + frame_display = cv2.cvtColor(frame_display, cv2.COLOR_BGR2RGB) + height, width, channel = frame_display.shape + bytes_per_line = channel * width + q_img = QImage(frame_display.data, width, height, bytes_per_line, QImage.Format_RGB888) + self.liveVideoLabel.setPixmap(QPixmap.fromImage(q_img)) + + + def get_face(self, frame): + (h, w) = frame.shape[:2] + blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)), 1.0, (300, 300), (104.0, 177.0, 123.0)) + self.face_net.setInput(blob) + detections = self.face_net.forward() + + for i in range(0, detections.shape[2]): + confidence = detections[0, 0, i, 2] + if confidence > 0.5: + box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) + (startX, startY, endX, endY) = box.astype("int") + face_region = frame[startY:endY, startX:endX] + cv2.rectangle(frame, (startX, startY), (endX, endY), (0, 255, 0), 2) + return face_region + return None + + + def get_upper_body(self, frame): + (h, w) = frame.shape[:2] + startY = int(h / 2) + endY = h + startX = int(w / 4) + endX = int(w * 3 / 4) + + cropped_frame = frame[startY:endY, startX:endX] + return cropped_frame, (startX, startY, endX, endY) + + def process_buffers(self): + if self.is_processing: + return + + self.is_processing = True + self.bufferStatusLabel.setText('Buffer status: Completed') + self.filterStatusLabel.setText('Filter status: Running') + + time_window = int(self.timeWindowMenu.currentText()) + + if self.filter == "Ideal" and self.face_buffer: + self.worker = FilterWorker( + self.face_buffer.copy(), # Copy buffer before clearing + self.alpha, + self.chromAttenuation, + self.camera_thread.fps, + filter_type="Ideal", + time_window=time_window + ) + self.worker.result_ready.connect(self.display_filtered_video) + self.worker.start() + + self.face_buffer.clear() + + elif self.filter == "Butterworth" and self.video_buffer: + self.worker = FilterWorker( + self.video_buffer.copy(), # Copy buffer before clearing + self.alpha, + self.chromAttenuation, + self.camera_thread.fps, + filter_type="Butterworth", + time_window=time_window + ) + self.worker.result_ready.connect(self.display_filtered_video) + self.worker.start() + + # Clear the buffer after starting the filter worker + self.video_buffer.clear() + + def display_filtered_video(self, final_video, bpm): + self.bpmLabel.setText(f'BPM: {bpm:.2f}') + + self.filterStatusLabel.setText('Filter status: Displaying video') + self.frame_index = 0 + self.final_video = final_video + + # Stop the existing timer (if any) and set up a new timer for frame display + if hasattr(self, 'frame_timer'): + self.frame_timer.stop() + + self.frame_timer = QTimer(self) + self.frame_timer.timeout.connect(lambda: self.show_filtered_frame(self.final_video)) + self.frame_timer.start(int(1000 / self.fps)) # Display frames based on FPS + print(self.fps) + + def show_filtered_frame(self, final_video): + """Displays each frame from the filtered video using a QTimer.""" + if self.frame_index < len(final_video): + frame = final_video[self.frame_index] + + if frame.dtype == np.float64: + frame = cv2.normalize(frame, None, 0, 255, cv2.NORM_MINMAX) + frame = frame.astype(np.uint8) + + # Resize and display the filtered frame + frame_resized = self.resize_frame(frame, self.processedVideoLabel) + frame_resized = cv2.cvtColor(frame_resized, cv2.COLOR_BGR2RGB) + height, width, channel = frame_resized.shape + bytes_per_line = channel * width + q_img = QImage(frame_resized.data, width, height, bytes_per_line, QImage.Format_RGB888) + + self.processedVideoLabel.setPixmap(QPixmap.fromImage(q_img)) + QApplication.processEvents() + + self.frame_index += 1 + + else: + # Stop the filtered video display timer + self.frame_timer.stop() + + # Restart the live video feed + if hasattr(self, 'timer'): + self.timer.start(int(1000 / self.fps)) # Restart the live feed timer + else: + print("Error: Timer for live video is not initialized.") + self.filterStatusLabel.setText('Filter status: Completed') + self.is_processing = False + self.bufferStatusLabel.setText('Buffer status: Blocked') + + def resize_frame(self, frame, label): + size = label.size() + return cv2.resize(frame, (size.width(), size.height())) + + def closeEvent(self, event): + if self.camera_thread: + self.camera_thread.stop() + if self.worker: + self.worker.stop() + if self.frame_timer: + self.frame_timer.stop() + event.accept() + + +if __name__ == '__main__': + app = QApplication(sys.argv) + window = ParameterGUI() + window.show() + sys.exit(app.exec_()) + + + for i in range(frame_count): + ret, frame = cap.read() + if not ret: + print(f"Frame {i+1} konnte nicht gelesen werden.") + break + frames[i] = frame + + # Optional: Vorschau anzeigen + cv2.imshow("Aufnahme", frame) + if cv2.waitKey(1) & 0xFF == ord('q'): # Beenden durch Drücken von 'q' + break + + # Kamera und Fenster freigeben + cap.release() + cv2.destroyAllWindows() \ No newline at end of file diff --git a/mobilenet_iter_73000.caffemodel b/mobilenet_iter_73000.caffemodel new file mode 100644 index 0000000..253e501 Binary files /dev/null and b/mobilenet_iter_73000.caffemodel differ diff --git a/parser.py b/parser.py new file mode 100644 index 0000000..dadcf5b --- /dev/null +++ b/parser.py @@ -0,0 +1,76 @@ +import configparser + +# Erstelle eine Parser-Klasse für INI-Dateien +class ParameterParser: + def __init__(self, config_file='config.ini'): + self.config_file = config_file + self.config = configparser.ConfigParser() + self.config.read(self.config_file) + + # Ensure 'Parameters' and 'Video' sections exist + if 'Parameters' not in self.config: + self.config['Parameters'] = {} + if 'Video' not in self.config: + self.config['Video'] = {} + + def get_parameters(self): + # Lese die Parameter aus der Konfigurationsdatei + try: + alpha = self.config.getfloat('Parameters', 'alpha') + cutoff = self.config.getfloat('Parameters', 'cutoff') + low = self.config.getfloat('Parameters', 'low') + high = self.config.getfloat('Parameters', 'high') + chromAttenuation = self.config.getfloat('Parameters', 'chromAttenuation') + mode = self.config.getint('Parameters', 'mode') + + # Read video parameters + width = self.config.getint('Video', 'width', fallback=1280) + height = self.config.getint('Video', 'height', fallback=720) + fps = self.config.getint('Video', 'fps', fallback=30) + + return { + "alpha": alpha, + "cutoff": cutoff, + "low": low, + "high": high, + "chromAttenuation": chromAttenuation, + "mode": mode, + "width": width, + "height": height, + "fps": fps + } + except Exception as e: + print(f"Error reading config: {e}") + return None + + def set_parameters(self, alpha, cutoff, low, high, chromAttenuation, mode, width, height, fps): + # Schreibe die Parameter in die Konfigurationsdatei + self.config['Parameters'] = { + 'alpha': str(alpha), + 'cutoff': str(cutoff), + 'low': str(low), + 'high': str(high), + 'chromAttenuation': str(chromAttenuation), + 'mode': str(mode) + } + + # Save video parameters + self.config['Video'] = { + 'width': str(width), + 'height': str(height), + 'fps': str(fps) + } + + with open(self.config_file, 'w') as configfile: + self.config.write(configfile) + +# Beispiel: Erstellen und Speichern von Parametern +if __name__ == "__main__": + parser = ParameterParser() + + # Beispielwerte speichern + parser.set_parameters(0.5, 1.0, 0.2, 1.5, 0.8, 1, 1280, 720, 30) + + # Parameter auslesen + parameters = parser.get_parameters() + print(parameters) diff --git a/pyramid.py b/pyramid.py new file mode 100644 index 0000000..0094e48 --- /dev/null +++ b/pyramid.py @@ -0,0 +1,108 @@ +#.................................. +#........Visualisierung 2.......... +#.................................. +#...Eulerian Video Magnification... +#.................................. +#.. Author: Galya Pavlova.......... +#.................................. + +import cv2 +import numpy as np + + +def create_gaussian_pyramid(image, levels): + ''' + Creates a Gaussian pyramid for each image. + :param image: An image, i.e video frame + :param levels: The Gaussian pyramid level + :return: Returns a pyramid of nr. levels images + ''' + gauss = image.copy() + gauss_pyr = [gauss] + + for level in range(1, levels): + gauss = cv2.pyrDown(gauss) + gauss_pyr.append(gauss) + + return gauss_pyr + + +def gaussian_video(video_tensor, levels): + ''' + For a given video sequence the function creates a video with + the highest (specified by levels) Gaussian pyramid level + :param video_tensor: Video sequence + :param levels: Specifies the Gaussian pyramid levels + :return: a video sequence where each frame is the downsampled of the original frame + ''' + for i in range(0, video_tensor.shape[0]): + frame = video_tensor[i] + pyr = create_gaussian_pyramid(frame, levels) + gaussian_frame = pyr[-1] # use only highest gaussian level + if i == 0: # initialize one time + vid_data = np.zeros((video_tensor.shape[0], gaussian_frame.shape[0], gaussian_frame.shape[1], 3)) + + vid_data[i] = gaussian_frame + return vid_data + + +def create_laplacian_pyramid(image, levels): + ''' + Builds a Laplace pyramid for an image, i.e. video frame + :param image: Image, i.e. single video frame + :param levels: Specifies the Laplace pyramid levels + :return: Returns a pyramid of nr. levels images + ''' + gauss_pyramid = create_gaussian_pyramid(image, levels) + laplace_pyramid = [] + for i in range(levels-1): + size = (gauss_pyramid[i].shape[1], gauss_pyramid[i].shape[0]) # reshape + laplace_pyramid.append(gauss_pyramid[i]-cv2.pyrUp(gauss_pyramid[i+1], dstsize=size)) + + laplace_pyramid.append(gauss_pyramid[-1]) # add last gauss pyramid level + return laplace_pyramid + + +def laplacian_video_pyramid(video_stack, levels): + ''' + Creates a Laplacian pyramid for the whole video sequence + :param video_stack: Video sequence + :param levels: Specifies the Laplace pyramid levels + :return: A two-dimensional array where the first index is used for the pyramid levels + and the second for each video frame + ''' + print('Build laplace pyramid') + + # "2 dimensional" array - first index for pyramid level, second for frames + laplace_video_pyramid = [[0 for x in range(video_stack.shape[0])] for x in range(levels)] + + for i in range(video_stack.shape[0]): + frame = video_stack[i] + pyr = create_laplacian_pyramid(frame, levels) + + for n in range(levels): + laplace_video_pyramid[n][i] = pyr[n] + + return laplace_video_pyramid + + +def reconstruct(filtered_video, levels): + ''' + Reconstructs a video sequence from the filtered Laplace video pyramid + :param filtered_video: 2 dimensional video sequence - 1st. index pyramid levels, 2nd. - video frames + :param levels: pyramid levels + :return: video sequence + ''' + print('Reconstruct video') + + final = np.empty(filtered_video[0].shape) + for i in range(filtered_video[0].shape[0]): # iterate through frames + + up = filtered_video[-1][i] # highest level + for k in range(levels-1, 0, -1): # going down to lowest level + size = (filtered_video[k-1][i].shape[1], filtered_video[k-1][i].shape[0]) # reshape + up = cv2.pyrUp(up, dstsize=size) + filtered_video[k-1][i] + + final[i] = up + + return final diff --git a/res10_300x300_ssd_iter_140000_fp16.caffemodel b/res10_300x300_ssd_iter_140000_fp16.caffemodel new file mode 100644 index 0000000..0e9cd4a Binary files /dev/null and b/res10_300x300_ssd_iter_140000_fp16.caffemodel differ diff --git a/video.py b/video.py new file mode 100644 index 0000000..df43af0 --- /dev/null +++ b/video.py @@ -0,0 +1,43 @@ +import numpy as np +import cv2 +import platform + + +def calculate_pyramid_levels(vidWidth, vidHeight): + ''' + Calculates the maximal pyramid levels for the Laplacian pyramid + :param vidWidth: video frames' width + :param vidHeight: video frames' height + ''' + if vidWidth < vidHeight: + levels = int(np.log2(vidWidth)) + else: + levels = int(np.log2(vidHeight)) + + return levels + + +def rgb2yiq(video): + ''' + Converts the video color from RGB to YIQ (NTSC) + :param video: RGB video sequence + :return: YIQ-color video sequence + ''' + yiq_from_rgb = np.array([[0.299, 0.587, 0.114], + [0.596, -0.274, -0.322], + [0.211, -0.523, 0.312]]) + t = np.dot(video, yiq_from_rgb.T) + return t + + +def yiq2rgb(video): + ''' + Converts the video color from YIQ (NTSC) to RGB + :param video: YIQ-color video sequence + :return: RGB video sequence + ''' + rgb_from_yiq = np.array([[1, 0.956, 0.621], + [1, -0.272, -0.647], + [1, -1.106, 1.703]]) + t = np.dot(video, rgb_from_yiq.T) + return t