------------------------------------------------------------------------
-- fft
--
-- calculation of FFT magnitudes
--
-- Inputs:
-- 32-Bit Floating Point number in range +-16 expected (loaded from FIFO)
--
-- Outputs
-- 32-Bit Floating Point number in range +-16 calculated (stored in FIFO)
-- 
--
-- Zahlen aus dem Eingangs-FIFO liegen in 32-Bit Floating Point mit Wertebereich +-16 vor 
-- Diese Zahlen müssen in Floating Point auf den Wertebereich +-1 gebracht werden (In Floating Point können Sie durch :16 teilen, wenn Sie den Exponenten der Floating Point Zahl um -4 verkleinern, falls dieser ungleich Null ist) 
-- Die auf den Wertebereich +-1 gebrachten Floating Point Zahlen mit to_fixed auf eine Fixpointzahl wandeln 
-- Diese Fixpointzahl kann dem FFT IP-Core (fftmain) als Eingangswert übergeben werden (Realteil = skalierte auf Fixpoint gewandelte Zahlen; Imaginärteil=0) 
-- Die vom FFT IP-Core berechneten werden (Realteil und Imaginärteil) können direkt dem IP-Core für die FFT Magnitude Berechnung (fft_magnitude_calc) übergeben werden (dieser arbeitet auch in Fixpoint im gleichen Wertebereich) 
-- Das Ergebnis des FFT Magnitude Berechnung IP-Cores (fft_magnitude_calc) dann auf Floating Point wandeln (to_float) 
-- Diese Floating Point Zahlen dann wieder skalieren mit *16 bzw. *32 für den DC-Anteil um auf den ursprünglichen Wertebereich mit +-16 zu kommen (aus dem FFT IP-Core kommt der DC-Anteil / Index 0 um den Faktor 2 zu klein, deswegen dort *32). 
-- (In Floating Point können Sie *16 machen, wenn Sie den Exponenten der Floating Point Zahl um +4 vergrößern, *32 wenn dieser um +5 vergrößert wird, falls der Exponent ungleich Null ist) 
-- Die Ergebnisse liegen noch in der bit-reveserd order vor (FFT IP-Core arbeitet nicht in-place) und müssen deswegen noch auf die natural order gebracht werden (https://de.mathworks.com/help/dsp/ug/linear-and-bit-reversed-output-order.html) 
-- (z.B: ein Array verwenden, um die Werte zu sortieren)
-- Dann das Ergebnis in den Ausgangsfifo speichern 
--
-----------------------------------------------------------------------
library ieee;
    use ieee.std_logic_1164.all;
    use ieee.numeric_std.all;

library work;
    use work.reg32.all;
    use work.task.all;
	 use work.float.all;

entity fft is
  generic (

    -- input data width of real/img part 
      input_data_width : integer := 32;

    -- output data width of real/img part 
      output_data_width : integer := 32

    );
    port (
        clk : in std_logic;
        reset : in std_logic;

        task_start : in std_logic;
        task_state : out work.task.State;
	  
        signal_read : out std_logic;
        signal_readdata : in std_logic_vector( 31 downto 0 );
		  
        signal_write : out std_logic;
        signal_writedata : out std_logic_vector( 31 downto 0 )
    );
end entity fft;

architecture rtl of fft is


-- Signale für Task State Machine
    signal current_task_state : work.task.State;
    signal next_task_state : work.task.State;
    signal index : integer range 0 to work.task.STREAM_LEN;
    --signal index : integer range 0 to 2000;

    -- FFT Ablaufzustandsmaschine
    type fft_state_type is (FFT_IDLE, FFT_READ, FFT_WAIT, FFT_WRITE);

    signal fft_state      : fft_state_type;
    signal fft_next_state : fft_state_type;

	function scale_exponent(arg : signed; delta : integer) return signed is
	    variable tmp : signed(arg'range);
	begin
	    tmp := arg;

	    -- Exponent (Bits 30..23) extrahieren, in signed umwandeln, delta addieren,
	    -- und Ergebnis wieder als signed zurückschreiben
	    tmp(30 downto 23) := signed(tmp(30 downto 23)) + to_signed(delta, 8);

	    return tmp;
	end function;

	function scale_value(idx : integer) return integer is
	begin
	    if idx = 0 then
		return 6;   -- DC-Bin → ×64 statt ×32
	    else
		return 5;   -- Rest → ×32 statt ×16
	    end if;
	end function;


    -- Zähler für eingelesene Samples
    signal sample_cnt : integer range 0 to work.task.STREAM_LEN;

-- component des Verilog IP-Cores fuer die FFT
component fftmain is
	port(
		clock: in std_logic; -- Master Clock
		reset: in std_logic; -- Active High Asynchronous Reset
		di_en: in std_logic; --  Input Data Enable
		di_re: in std_logic_vector(input_data_width-1 downto 0); -- Input Data (Real)
                di_im: in std_logic_vector(input_data_width-1 downto 0); -- Input Data (Imag)
		do_en: out std_logic; -- Output Data Enable
		do_re: out std_logic_vector(output_data_width-1 downto 0); -- Output Data (Real)
                do_im: out std_logic_vector(output_data_width-1 downto 0) -- Output Data (Imag)
	);
end component;

--  Signale Input skaliert
    signal fft_float_input : signed( 31 downto 0 ); 
    signal fft_float_scaled_input : signed( 31 downto 0 ); 

-- Signale fuer FFT-IP Core
    -- fft data input signal
    signal fft_input_data_enable: std_logic;
    signal data_in_re : std_logic_vector (input_data_width-1 downto 0);
    signal data_in_im : std_logic_vector (input_data_width-1 downto 0);
    -- fft output data
    signal fft_output_valid : std_logic;
    signal data_out_re : std_logic_vector (output_data_width-1 downto 0);
    signal data_out_im : std_logic_vector (output_data_width-1 downto 0);

-- Signale fuer Magnitude IP-Core
    signal fft_mag_calc_valid : std_logic;	
    signal fft_mag_calc_result: std_logic_vector (output_data_width-1 downto 0);

-- Signale fuer Ergebnis skaliert
    signal data_out_mag_signed_float : signed (output_data_width-1 downto 0);	
    signal fft_float_scaled : signed( 31 downto 0 ); 
	 
-- Signale/Array um Ergebnisse der FFT in der natural order zu speichern
	
    signal data_memory : work.reg32.RegArray( 0 to 1023 );
    signal index_reversed : std_logic_vector(9 downto 0);
    signal index_output_sv : std_logic_vector(9 downto 0);
    signal index_output : integer range 0 to 1023;

-- Signal um in den Write FIFO zu schreiben
    signal wr_fifo : std_logic;	 
	  
begin
    
    -----------------------------------------------------------------------------------------------
    -- Hier muss der Verilog FFT IP-Core instanziert werden
    -----------------------------------------------------------------------------------------------

    u_fft : fftmain
       port map (		      
           clock => clk, -- system clock				
           reset => reset, -- Active High Asynchronous Reset				    
   	   di_en => fft_input_data_enable, -- Input Data Enable
    	   di_re => data_in_re, -- Input Data (Real)
           di_im => data_in_im, -- Input Data (Imag)				
           do_en => fft_output_valid, -- Output Data Enable				
    	   do_re => data_out_re, -- Output Data (Real)
           do_im =>  data_out_im -- Output Data (Imag)
        ); 
    
  --  fft_output_valid <= '0';
   -- data_out_re <= (others => '0');
   -- data_out_im <= (others => '0');

    -----------------------------------------------------------------------------------------------
    -- Hier muss der VHDL Magnitue IP-COre instanziert werden
    -----------------------------------------------------------------------------------------------
			 
     u_fft_mag_calc : entity work.fft_magnitude_calc
     port map (
      clk => clk, -- system clock
      reset => reset, -- Active High Asynchronous Reset	
      input_valid => fft_output_valid, -- Input Data Valid
      input_re => data_out_re,  -- Input Realteil in Fixpoint format
      input_im =>  data_out_im, -- Input Imaginaerteil in Fixpoint format
      output_valid => fft_mag_calc_valid, -- Output Data Valid
      output_magnitude =>  fft_mag_calc_result -- Magnitude Output in Fixpoint format
      );

 --   fft_mag_calc_valid <= '1' when index = 0 else '0';
 --  fft_mag_calc_result <= (others => '0');

    -----------------------------------------------------------------------------------------------
    -- Zustandsmaschine fuer die Taskabarbeitung (Uebergangsschaltnetz)
    -----------------------------------------------------------------------------------------------
    task_state_transitions : process (all) is
    begin
        next_task_state <= current_task_state;
        case current_task_state is
            when work.task.TASK_IDLE =>
                if ( task_start = '1' ) then
                    next_task_state <= work.task.TASK_RUNNING;
                end if;
            when work.task.TASK_RUNNING =>
                if (  index = 2 ) then
                    next_task_state <= work.task.TASK_DONE;
                end if;
            when work.task.TASK_DONE =>
                if ( task_start = '1' ) then
                    next_task_state <= work.task.TASK_RUNNING;
                end if;
        end case;
    end process task_state_transitions;

    -----------------------------------------------------------------------------------------------
    -- Zustandsmaschine fuer die eigentliche Ablaufsteuerung fuer die FFT (Uebergangsschaltnetz)

    --  - FFT_IDLE : Warten auf TASK_RUNNING
    --  - FFT_READ : STREAM_LEN Samples aus dem Eingangs-FIFO lesen und in FFT schieben
    --  - FFT_WAIT : Warten, bis alle FFT-/Magnitude-Ergebnisse im Speicher sind (index wird 1)
    --  - FFT_WRITE: Schreiben in Ausgangs-FIFO (gesteuert über vorhandene wr_fifo/index-Logik)
    -----------------------------------------------------------------------------------------------

    -- Hier soll Ihre Ablaufsteuerung fuer die FFT stehen

    fft_next_state_machine : process (all) is
    begin
	-- Default-Ausgänge
        signal_read            <= '0';				
        fft_input_data_enable  <= '0';

        fft_next_state <= fft_state;

	case fft_state is
	    when FFT_IDLE =>		-- auf Start der Aufgabe warten
		if current_task_state = work.task.TASK_RUNNING then
		   fft_next_state <= FFT_READ;
		end if;

	    when FFT_READ =>		-- Eingänge aus FIFO lesen und in FFT schieben
		signal_read		<= '1';
		fft_input_data_enable	<= '1';

	    	if sample_cnt = work.task.STREAM_LEN - 1 then
			fft_next_state <= FFT_WAIT;
		end if;

	    when FFT_WAIT =>		-- Warten bis alle Magnitude-Wete im Speicher liegen
		if index = 1 then
		 fft_next_state <= FFT_WRITE;
 		end if;

	    when FFT_WRITE =>		

		if next_task_state = TASK_DONE then

		 fft_next_state <= FFT_IDLE;

		end if;

	end case;
    end process fft_next_state_machine;

	process(clk, reset)
	begin
	    if reset = '1' then
		fft_state <= FFT_IDLE;
		sample_cnt <= 0;
	    elsif rising_edge(clk) then
		fft_state <= fft_next_state;

	    if fft_state = FFT_IDLE then
		    sample_cnt <= 0;
		elsif fft_state = FFT_READ then
		    sample_cnt <= sample_cnt + 1;
	    end if;
	    end if;

	end process;

    -----------------------------------------------------------------------------------------------
    -- Ausgangsschaltnetz/Zustandsspeicher fuer die Task und FFT Zustandsmaschine
    -----------------------------------------------------------------------------------------------
   sync : process ( clk, reset ) is
    begin
        if ( reset = '1' ) then
            current_task_state <= work.task.TASK_IDLE;
            index <= 0;
            wr_fifo <= '0';
        elsif ( rising_edge( clk ) ) then
            current_task_state <= next_task_state;
            wr_fifo <= '0';
            case next_task_state is
		    when work.task.TASK_IDLE => 
                      index <= 0;
		    when work.task.TASK_RUNNING =>
                      -- Nur damit das Template durchlaueft bei index=0 wird das natural order array mit Nullen gefuellt
                      -- Bei index=1 werden die 1024 Werte in den Ausgangsfifo geschrieben (Task done bei index=2)
		      if (  index_output = work.task.STREAM_LEN - 1 ) then
		      	  index <= index + 1;	
                      end if;
                      if index = 1 then				 
			  wr_fifo <= '1';
		      end if; 
		    when work.task.TASK_DONE => null;
            end case;
        end if;
    end process sync; 
	 

    -----------------------------------------------------------------------------------------------
    --
    -- Skalierung der Eingangswerte welche vom FIFO gelesen werden
    -- Dies soll außerhalb eines Prozesses geschehen damit die gelesenen Werte direkt skaliert werden
    -- und im naechsten Takt schon weiter verarbeitet werden können
    --
    -- Erforderliches Scaling:
    --
    -- By selecting the amplitude as a power of two (e.g. 2 ** 2) the
    -- multiplication is a simple addition of the exponents.
    -- In the following calculation the inputs are scaled from FP in range +-16 to FP in range +-1
    -- This means an divsion through 16 -> exponent needs an addition of - 4
    --
    -- fft_float_input = gelesener Wert vom FIFO (floating point)
    -- fft_float_scaled_input = soll skalierter Wert vom FIFO seien (floating point)
    -- (Anm. Der FFT IP-Core braucht als Format Fix-Point -> noch eine weitere Wandlung erforderlich)
    -----------------------------------------------------------------------------------------------

    fft_float_input <= signed(signal_readdata);

    fft_float_scaled_input <= scale_exponent(fft_float_input, -4); -- E = E-4 Der Eingang muss noch entsprechend skaliert werden 

    data_in_re <= to_fixed(std_logic_vector(fft_float_scaled_input));

    data_in_im <= (others => '0'); 	-- Imaginärteil = 0
				
    -----------------------------------------------------------------------------------------------
    --
    -- Skalierung der Eingangswerte welche vom FIFO gelesen werden
    -- Dies soll außerhalb eines Prozesses geschehen damit die gelesenen Werte direkt skaliert werden
    -- und im naechsten Takt schon weiter verarbeitet werden können
    --
    -- Erforderliches Scaling:
    --
    -- By selecting the amplitude as a power of two (e.g. 2 ** 2) the
    -- multiplication is a simple addition of the exponents.
    -- In the following calculation the inputs are scaled from FP in range +-1 to FP in range +-16 
    -- the first frequency bin (DC-bin) needs a multiplication by two compared to the other frequency bins (the used fft ip-core divides the result of the first 	frequency bin by N instead of the correct N/2)
    -- This means an divsion through 16 is required for the first frequency bin (DC Part) -> exponent needs an addition of +4
    -- This means an divsion through 32 is required for the first frequency bin (DC Part) -> exponent needs an addition of +5
    --
    -- data_out_mag_signed_float = in float gewandelter Wert der Magnitude Berechnung
    -- fft_float_scaled = soll der skalierte float Wert der Magnitude seien
    -----------------------------------------------------------------------------------------------

	data_out_mag_signed_float <= signed(to_float(fft_mag_calc_result));

	fft_float_scaled <= (others => '0')
	    when fft_mag_calc_valid = '0'
	    else scale_exponent(
		    data_out_mag_signed_float,
		    scale_value(index_output)
		 );

						-- Der Ausgang muss noch entsprechend skaliert werden


    -----------------------------------------------------------------------------------------------
    -- Der FFT-IP Core liefert das Ergebnis nicht in der natuerlichen Reihenfolge deswegen muss eine
	-- Umordnung der Ausgangswerte erfolgen
	-- 
	-- index_output_sv = std_logic_vector des Integer Ausgangsindex
	-- index_reversed = der reversed Ausgangsindex (wird benoetigt fuer damit man die FFT Ergebnisse in die natuerliche Ordnung bringt
	--
    c_index_output_sv:
            index_output_sv <= std_logic_vector(to_unsigned(index_output, index_reversed'length));
    c_reversed_index:
            index_reversed <= index_output_sv(0) & index_output_sv(1) & index_output_sv(2) & index_output_sv(3) & index_output_sv(4) & index_output_sv(5) & index_output_sv(6) & index_output_sv(7) & index_output_sv(8) & index_output_sv(9);
	
	   
    -----------------------------------------------------------------------------------------------
    -- Prozess steuert das hochzaehlen des Ausgang Index
    -----------------------------------------------------------------------------------------------
	p_number_output_sample: process ( clk, reset ) is
    begin
        if ( reset = '1' ) then
            index_output <= 0; 
        elsif ( rising_edge( clk ) ) then
            -- Ruecksetz Bedingung für index_output
            if index_output = 1023 then -- in diese IF-Bedingung ggf. noch den IDLE Zustand Ihrer FFT FSM einbringen
		index_output <= 0;
            -- index_output hochzaehlen um in natural order im array zu speichern
	    elsif fft_mag_calc_valid = '1' then
		index_output <= index_output + 1;
            -- index_output hochzaehlen um Werte im Ausgangsfifo zu speichern
	    elsif wr_fifo = '1' then
		index_output <= index_output + 1;
	    end if;
        end if;
    end process p_number_output_sample;
	
    -----------------------------------------------------------------------------------------------
    -- Prozess speichert das skalierte Endergbenis iun der natural order
    -----------------------------------------------------------------------------------------------
    p_output2float_memory: process ( clk, reset) is
    begin
        if ( reset = '1' ) then
	    null;
        elsif ( rising_edge( clk ) ) then
            if fft_mag_calc_valid = '1' then
	                data_memory(to_integer(unsigned(index_reversed))) <= std_logic_vector(fft_float_scaled);
            end if;
        end if;
    end process p_output2float_memory;	
	
    -----------------------------------------------------------------------------------------------
    -- Schreiben der berechneten Werte in den FIFO
    -----------------------------------------------------------------------------------------------
   p_output_fifo: process ( clk, reset ) is
    begin
        if ( reset = '1' ) then
            signal_writedata <= (others => '0');
		  signal_write <= '0';
        elsif ( rising_edge( clk ) ) then
		  signal_write <= '0';		  
            if wr_fifo = '1' then
		  signal_writedata <= data_memory(index_output);
	          signal_write <= '1';
            end if;
        end if;
    end process p_output_fifo;	
	
	
	-- Hier sollen die sonstigen benoetigten Anweisungen stehen
    task_state <= current_task_state;	

end architecture rtl;