From c90753aa233bb20ecb233a8fa80b562a0ec8328b Mon Sep 17 00:00:00 2001
From: zieglerhe <zieglerhe@efiapps0.ads1.fh-nuernberg.de>
Date: Fri, 30 May 2025 11:33:35 +0200
Subject: [PATCH] New FFT.vhd Template and Fix FFT TB (expected values)

---
 hardware/signal_processing/fft.vhd        | 273 ++++++++++++++++++++--
 tests/hardware/task_fft/test_task_fft.vhd |   4 +-
 tests/hardware/task_fft/vsim.wave         |   1 +
 3 files changed, 257 insertions(+), 21 deletions(-)

diff --git a/hardware/signal_processing/fft.vhd b/hardware/signal_processing/fft.vhd
index a10f221..e1ea5f3 100644
--- a/hardware/signal_processing/fft.vhd
+++ b/hardware/signal_processing/fft.vhd
@@ -1,13 +1,26 @@
 ------------------------------------------------------------------------
 -- fft
 --
--- calculation of FFT magnitude
+-- calculation of FFT magnitudes
 --
 -- Inputs:
 -- 32-Bit Floating Point number in range +-16 expected (loaded from FIFO)
 --
 -- Outputs
 -- 32-Bit Floating Point number in range +-16 calculated (stored in FIFO)
+-- 
+--
+-- Zahlen aus dem Eingangs-FIFO liegen in 32-Bit Floating Point mit Wertebereich +-16 vor 
+-- Diese Zahlen müssen in Floating Point auf den Wertebereich +-1 gebracht werden (In Floating Point können Sie durch :16 teilen, wenn Sie den Exponenten der Floating Point Zahl um -4 verkleinern, falls dieser ungleich Null ist) 
+-- Die auf den Wertebereich +-1 gebrachten Floating Point Zahlen mit to_fixed auf eine Fixpointzahl wandeln 
+-- Diese Fixpointzahl kann dem FFT IP-Core (fftmain) als Eingangswert übergeben werden (Realteil = skalierte auf Fixpoint gewandelte Zahlen; Imaginärteil=0) 
+-- Die vom FFT IP-Core berechneten werden (Realteil und Imaginärteil) können direkt dem IP-Core für die FFT Magnitude Berechnung (fft_magnitude_calc) übergeben werden (dieser arbeitet auch in Fixpoint im gleichen Wertebereich) 
+-- Das Ergebnis des FFT Magnitude Berechnung IP-Cores (fft_magnitude_calc) dann auf Floating Point wandeln (to_float) 
+-- Diese Floating Point Zahlen dann wieder skalieren mit *16 bzw. *32 für den DC-Anteil um auf den ursprünglichen Wertebereich mit +-16 zu kommen (aus dem FFT IP-Core kommt der DC-Anteil / Index 0 um den Faktor 2 zu klein, deswegen dort *32). 
+-- (In Floating Point können Sie *16 machen, wenn Sie den Exponenten der Floating Point Zahl um +4 vergrößern, *32 wenn dieser um +5 vergrößert wird, falls der Exponent ungleich Null ist) 
+-- Die Ergebnisse liegen noch in der bit-reveserd order vor (FFT IP-Core arbeitet nicht in-place) und müssen deswegen noch auf die natural order gebracht werden (https://de.mathworks.com/help/dsp/ug/linear-and-bit-reversed-output-order.html) 
+-- (z.B: ein Array verwenden, um die Werte zu sortieren)
+-- Dann das Ergebnis in den Ausgangsfifo speichern 
 --
 -----------------------------------------------------------------------
 library ieee;
@@ -22,10 +35,10 @@ library work;
 entity fft is
   generic (
 
-    -- input data width of real/img part
+    -- input data width of real/img part 
       input_data_width : integer := 32;
 
-    -- output data width of real/img part
+    -- output data width of real/img part 
       output_data_width : integer := 32
 
     );
@@ -35,10 +48,10 @@ entity fft is
 
         task_start : in std_logic;
         task_state : out work.task.State;
-
+	  
         signal_read : out std_logic;
         signal_readdata : in std_logic_vector( 31 downto 0 );
-
+		  
         signal_write : out std_logic;
         signal_writedata : out std_logic_vector( 31 downto 0 )
     );
@@ -46,12 +59,103 @@ end entity fft;
 
 architecture rtl of fft is
 
+
+-- Signale für Task State Machine
     signal current_task_state : work.task.State;
     signal next_task_state : work.task.State;
     signal index : integer range 0 to work.task.STREAM_LEN;
+    --signal index : integer range 0 to 2000;
 
+-- component des Verilog IP-Cores fuer die FFT
+component fftmain is
+	port(
+		clock: in std_logic; -- Master Clock
+		reset: in std_logic; -- Active High Asynchronous Reset
+		di_en: in std_logic; --  Input Data Enable
+		di_re: in std_logic_vector(input_data_width-1 downto 0); -- Input Data (Real)
+                di_im: in std_logic_vector(input_data_width-1 downto 0); -- Input Data (Imag)
+		do_en: out std_logic; -- Output Data Enable
+		do_re: out std_logic_vector(output_data_width-1 downto 0); -- Output Data (Real)
+                do_im: out std_logic_vector(output_data_width-1 downto 0) -- Output Data (Imag)
+	);
+end component;
+
+--  Signale Input skaliert
+    signal fft_float_input : signed( 31 downto 0 ); 
+    signal fft_float_scaled_input : signed( 31 downto 0 ); 
+
+-- Signale fuer FFT-IP Core
+    -- fft data input signal
+    signal fft_input_data_enable: std_logic;
+    signal data_in_re : std_logic_vector (input_data_width-1 downto 0);
+    signal data_in_im : std_logic_vector (input_data_width-1 downto 0);
+    -- fft output data
+    signal fft_output_valid : std_logic;
+    signal data_out_re : std_logic_vector (output_data_width-1 downto 0);
+    signal data_out_im : std_logic_vector (output_data_width-1 downto 0);
+
+-- Signale fuer Magnitude IP-Core
+    signal fft_mag_calc_valid : std_logic;	
+    signal fft_mag_calc_result: std_logic_vector (output_data_width-1 downto 0);
+
+-- Signale fuer Ergebnis skaliert
+    signal data_out_mag_signed_float : signed (output_data_width-1 downto 0);	
+    signal fft_float_scaled : signed( 31 downto 0 ); 
+	 
+-- Signale/Array um Ergebnisse der FFT in der natural order zu speichern
+	
+    signal data_memory : work.reg32.RegArray( 0 to 1023 );
+    signal index_reversed : std_logic_vector(9 downto 0);
+    signal index_output_sv : std_logic_vector(9 downto 0);
+    signal index_output : integer range 0 to 1023;
+
+-- Signal um in den Write FIFO zu schreiben
+    signal wr_fifo : std_logic;	 
+	  
 begin
-    task_state_transitions : process ( current_task_state, task_start, index ) is
+    
+    -----------------------------------------------------------------------------------------------
+    -- Hier muss der Verilog FFT IP-Core instanziert werden
+    -----------------------------------------------------------------------------------------------
+
+    --u_fft : fftmain
+    --    port map (		      
+    --        clock => , -- system clock				
+    --        reset => , -- Active High Asynchronous Reset				    
+    --	    di_en => , -- Input Data Enable
+    --	    di_re => , -- Input Data (Real)
+    --      di_im => , -- Input Data (Imag)				
+    --      do_en => , -- Output Data Enable				
+    --	    do_re => , -- Output Data (Real)
+    --      do_im =>  -- Output Data (Imag)
+    --    ); 
+    
+    fft_output_valid <= '0';
+    data_out_re <= (others => '0');
+    data_out_im <= (others => '0');
+
+    -----------------------------------------------------------------------------------------------
+    -- Hier muss der VHDL Magnitue IP-COre instanziert werden
+    -----------------------------------------------------------------------------------------------
+			 
+    -- u_fft_mag_calc : entity work.fft_magnitude_calc
+    -- port map (
+    --  clk => , -- system clock
+    --  reset => , -- Active High Asynchronous Reset	
+    --	input_valid => , -- Input Data Valid
+    --	input_re => ,  -- Input Realteil in Fixpoint format
+    --	input_im =>  , -- Input Imaginaerteil in Fixpoint format
+    --	output_valid => , -- Output Data Valid
+    --	output_magnitude =>  -- Magnitude Output in Fixpoint format
+    --  );
+
+    fft_mag_calc_valid <= '1' when index = 0 else '0';
+    fft_mag_calc_result <= (others => '0');
+
+    -----------------------------------------------------------------------------------------------
+    -- Zustandsmaschine fuer die Taskabarbeitung (Uebergangsschaltnetz)
+    -----------------------------------------------------------------------------------------------
+    task_state_transitions : process (all) is
     begin
         next_task_state <= current_task_state;
         case current_task_state is
@@ -60,7 +164,7 @@ begin
                     next_task_state <= work.task.TASK_RUNNING;
                 end if;
             when work.task.TASK_RUNNING =>
-                if ( index = work.task.STREAM_LEN - 1 ) then
+                if (  index = 2 ) then
                     next_task_state <= work.task.TASK_DONE;
                 end if;
             when work.task.TASK_DONE =>
@@ -70,28 +174,157 @@ begin
         end case;
     end process task_state_transitions;
 
-    sync : process ( clk, reset ) is
+    -----------------------------------------------------------------------------------------------
+    -- Zustandsmaschine fuer die eigentliche Ablaufsteuerung fuer die FFT (Uebergangsschaltnetz)
+    -----------------------------------------------------------------------------------------------
+
+    -- Hier soll Ihre Ablaufsteuerung fuer die FFT stehen
+
+
+    -----------------------------------------------------------------------------------------------
+    -- Ausgangsschaltnetz/Zustandsspeicher fuer die Task und FFT Zustandsmaschine
+    -----------------------------------------------------------------------------------------------
+   sync : process ( clk, reset ) is
     begin
         if ( reset = '1' ) then
             current_task_state <= work.task.TASK_IDLE;
             index <= 0;
+            wr_fifo <= '0';
         elsif ( rising_edge( clk ) ) then
             current_task_state <= next_task_state;
+            wr_fifo <= '0';
             case next_task_state is
-            when work.task.TASK_IDLE =>
-                index <= 0;
-                signal_write <= '0';
-            when work.task.TASK_RUNNING =>
-                index <= index + 1;
-                signal_write <= '1';
-                signal_writedata <= ( others => '0' );
-            when work.task.TASK_DONE =>
-                index <= 0;
-                signal_write <= '0';
+		    when work.task.TASK_IDLE => 
+                      index <= 0;
+		    when work.task.TASK_RUNNING =>
+                      -- Nur damit das Template durchlaueft bei index=0 wird das natural order array mit Nullen gefuellt
+                      -- Bei index=1 werden die 1024 Werte in den Ausgangsfifo geschrieben (Task done bei index=2)
+		      if (  index_output = work.task.STREAM_LEN - 1 ) then
+		      	  index <= index + 1;	
+                      end if;
+                      if index = 1 then				 
+			  wr_fifo <= '1';
+		      end if; 
+		    when work.task.TASK_DONE => null;
             end case;
         end if;
-    end process sync;
+    end process sync; 
+	 
 
-    task_state <= current_task_state;
+    -----------------------------------------------------------------------------------------------
+    --
+    -- Skalierung der Eingangswerte welche vom FIFO gelesen werden
+    -- Dies soll außerhalb eines Prozesses geschehen damit die gelesenen Werte direkt skaliert werden
+    -- und im naechsten Takt schon weiter verarbeitet werden können
+    --
+    -- Erforderliches Scaling:
+    --
+    -- By selecting the amplitude as a power of two (e.g. 2 ** 2) the
+    -- multiplication is a simple addition of the exponents.
+    -- In the following calculation the inputs are scaled from FP in range +-16 to FP in range +-1
+    -- This means an divsion through 16 -> exponent needs an addition of - 4
+    --
+    -- fft_float_input = gelesener Wert vom FIFO (floating point)
+    -- fft_float_scaled_input = soll skalierter Wert vom FIFO seien (floating point)
+    -- (Anm. Der FFT IP-Core braucht als Format Fix-Point -> noch eine weitere Wandlung erforderlich)
+    -----------------------------------------------------------------------------------------------
+
+    fft_float_input <= signed(signal_readdata);
+
+    fft_float_scaled_input <= fft_float_input; -- Der Eingang muss noch entsprechend skaliert werden 
+				
+
+    -----------------------------------------------------------------------------------------------
+    --
+    -- Skalierung der Eingangswerte welche vom FIFO gelesen werden
+    -- Dies soll außerhalb eines Prozesses geschehen damit die gelesenen Werte direkt skaliert werden
+    -- und im naechsten Takt schon weiter verarbeitet werden können
+    --
+    -- Erforderliches Scaling:
+    --
+    -- By selecting the amplitude as a power of two (e.g. 2 ** 2) the
+    -- multiplication is a simple addition of the exponents.
+    -- In the following calculation the inputs are scaled from FP in range +-1 to FP in range +-16 
+    -- the first frequency bin (DC-bin) needs a multiplication by two compared to the other frequency bins (the used fft ip-core divides the result of the first frequency bin by N instead of the correct N/2)
+    -- This means an divsion through 16 is required for the first frequency bin (DC Part) -> exponent needs an addition of +4
+    -- This means an divsion through 32 is required for the first frequency bin (DC Part) -> exponent needs an addition of +5
+    --
+    -- data_out_mag_signed_float = in float gewandelter Wert der Magnitude Berechnung
+    -- fft_float_scaled = soll der skalierte float Wert der Magnitude seien
+    -----------------------------------------------------------------------------------------------
+
+    data_out_mag_signed_float <= signed(to_float(fft_mag_calc_result));
+
+    fft_float_scaled <= data_out_mag_signed_float; -- Der Ausgang muss noch entsprechend skaliert werden
+
+
+    -----------------------------------------------------------------------------------------------
+    -- Der FFT-IP Core liefert das Ergebnis nicht in der natuerlichen Reihenfolge deswegen muss eine
+	-- Umordnung der Ausgangswerte erfolgen
+	-- 
+	-- index_output_sv = std_logic_vector des Integer Ausgangsindex
+	-- index_reversed = der reversed Ausgangsindex (wird benoetigt fuer damit man die FFT Ergebnisse in die natuerliche Ordnung bringt
+	--
+    c_index_output_sv:
+            index_output_sv <= std_logic_vector(to_unsigned(index_output, index_reversed'length));
+    c_reversed_index:
+            index_reversed <= index_output_sv(0) & index_output_sv(1) & index_output_sv(2) & index_output_sv(3) & index_output_sv(4) & index_output_sv(5) & index_output_sv(6) & index_output_sv(7) & index_output_sv(8) & index_output_sv(9);
+	
+	   
+    -----------------------------------------------------------------------------------------------
+    -- Prozess steuert das hochzaehlen des Ausgang Index
+    -----------------------------------------------------------------------------------------------
+	p_number_output_sample: process ( clk, reset ) is
+    begin
+        if ( reset = '1' ) then
+            index_output <= 0; 
+        elsif ( rising_edge( clk ) ) then
+            -- Ruecksetz Bedingung für index_output
+            if index_output = 1023 then -- in diese IF-Bedingung ggf. noch den IDLE Zustand Ihrer FFT FSM einbringen
+		index_output <= 0;
+            -- index_output hochzaehlen um in natural order im array zu speichern
+	    elsif fft_mag_calc_valid = '1' then
+		index_output <= index_output + 1;
+            -- index_output hochzaehlen um Werte im Ausgangsfifo zu speichern
+	    elsif wr_fifo = '1' then
+		index_output <= index_output + 1;
+	    end if;
+        end if;
+    end process p_number_output_sample;
+	
+    -----------------------------------------------------------------------------------------------
+    -- Prozess speichert das skalierte Endergbenis iun der natural order
+    -----------------------------------------------------------------------------------------------
+    p_output2float_memory: process ( clk, reset) is
+    begin
+        if ( reset = '1' ) then
+	    null;
+        elsif ( rising_edge( clk ) ) then
+            if fft_mag_calc_valid = '1' then
+	                data_memory(to_integer(unsigned(index_reversed))) <= std_logic_vector(fft_float_scaled);
+            end if;
+        end if;
+    end process p_output2float_memory;	
+	
+    -----------------------------------------------------------------------------------------------
+    -- Schreiben der berechneten Werte in den FIFO
+    -----------------------------------------------------------------------------------------------
+   p_output_fifo: process ( clk, reset ) is
+    begin
+        if ( reset = '1' ) then
+            signal_writedata <= (others => '0');
+		  signal_write <= '0';
+        elsif ( rising_edge( clk ) ) then
+		  signal_write <= '0';		  
+            if wr_fifo = '1' then
+		  signal_writedata <= data_memory(index_output);
+	          signal_write <= '1';
+            end if;
+        end if;
+    end process p_output_fifo;	
+	
+	
+	-- Hier sollen die sonstigen benoetigten Anweisungen stehen
+    task_state <= current_task_state;	
 
 end architecture rtl;
diff --git a/tests/hardware/task_fft/test_task_fft.vhd b/tests/hardware/task_fft/test_task_fft.vhd
index c9d570b..c7e667e 100644
--- a/tests/hardware/task_fft/test_task_fft.vhd
+++ b/tests/hardware/task_fft/test_task_fft.vhd
@@ -63,7 +63,7 @@ architecture test of test_task_fft is
         variable writedata_float : float32;
         variable writedata_real : real;
         variable expected_real : real;
-        variable abs_err : real := 0.5e-1;
+        variable abs_err : real := 0.6;
         variable result : data_array( 0 to work.task.STREAM_LEN - 1 );
         variable result_fft : data_array( 0 to work.task.STREAM_LEN - 1 );
         file data_file : text;
@@ -110,11 +110,13 @@ architecture test of test_task_fft is
         std.textio.write( data_file_fft, "]" & LF );
         file_close( data_file_fft );
 
+        index := 0;
         while index < STREAM_LEN loop
             writedata_float := to_float( result( index ) );
             writedata_real := to_real( writedata_float );
             expected_real := work.fft_data.expected( index );
             assert_near( writedata_real, expected_real, abs_err );
+            index := index + 1;
         end loop;
 
         file_open( data_file_fft_bit_reversed, "fft_out_bit_reversed.py", write_mode );
diff --git a/tests/hardware/task_fft/vsim.wave b/tests/hardware/task_fft/vsim.wave
index d2f8057..55b44cc 100644
--- a/tests/hardware/task_fft/vsim.wave
+++ b/tests/hardware/task_fft/vsim.wave
@@ -1 +1,2 @@
 add wave -position end  sim:/test_task_fft/dut/*
+add wave -position end  sim:/test_task_fft/dut/u_fft/*