------------------------------------------------------------------------
-- fft_magnitude_calc
--
-- calculation of FFT magnitude sqrt(real_part²+im_part²)
-- Inputs:
-- input_re in: +-1 signed Fixpoint (0.5=0x40000000, -0.5=0xC0000000 (negative numbers in 2K)
-- input_im in: +-1 signed Fixpoint (0.5=0x40000000, -0.5=0xC0000000 (negative numbers in 2K)
-- input_valid: high = inputs are valid for data processing 
-- Outputs
-- output_magnitude: Fixpoint 0.5=0x40000000 (always positive)
-- output_valid:     high = magnitude data is valid
-----------------------------------------------------------------------

library ieee;
    use ieee.std_logic_1164.all;
    use ieee.numeric_std.all;

library work;
    use work.task.all;
	 use work.float.all;

entity fft_magnitude_calc is
    port (
        clk : in std_logic;
        reset : in std_logic;
		  
        input_valid: in std_logic;		  
        input_re : in std_logic_vector( 31 downto 0 ); -- in Fixpoint
		  input_im : in std_logic_vector( 31 downto 0 ); -- in Fixpoint
		  
        output_valid : out std_logic;
        output_magnitude : out std_logic_vector( 31 downto 0 )
    );
end entity fft_magnitude_calc;

architecture rtl of fft_magnitude_calc is

    subtype Word_64 is std_logic_vector( 63 downto 0 );
    type Array_64 is array ( natural range <> ) of Word_64;

    subtype Word_32 is std_logic_vector( 31 downto 0 );
    type Array_32 is array ( natural range <> ) of Word_32;

    subtype Word_16 is std_logic_vector( 15 downto 0 );
    type Array_16 is array ( natural range <> ) of Word_16;
 
    signal input_valid_stage1 : std_logic;
    signal re_multiply_re_stage1 : signed(63 downto 0);
	 signal im_multiply_re_stage1 : signed(63 downto 0);
	  
	 signal input_valid_stage2 : std_logic;
    signal re2_add_im2 : signed(63 downto 0);
	 
	 signal input_valid_stage3 : std_logic;
    signal input_sqrt : Array_32( 0 to 16 );
	 signal output_sqrt:  Array_16( 0 to 16 );
	 
	 signal output_delay_sqrt: std_logic_vector(15 downto 0);	
	 
	 signal data_memory : work.reg32.RegArray( 0 to 1023 );
	 signal index_sqrt : integer range 0 to 16;	 
	  
begin


    -- calculation of real_part² and im_part²
	 p_pow2_stage1: process ( clk, reset ) is
    begin
        if ( reset = '1' ) then
            input_valid_stage1 <= '0'; 
				re_multiply_re_stage1 <= (others => '0');
				im_multiply_re_stage1 <= (others => '0');
        elsif ( rising_edge( clk ) ) then
		      input_valid_stage1 <= input_valid;		
		      if input_valid = '1' then		
				    re_multiply_re_stage1 <= signed(input_re) * signed(input_re);
				    im_multiply_re_stage1 <= signed(input_im) * signed(input_im);
				end if;
        end if;
    end process p_pow2_stage1;

	 -- calculation of real_part²*+im_part²
    p_add_stage2: process ( clk, reset ) is
    begin
        if ( reset = '1' ) then
            input_valid_stage2 <= '0'; 
				re2_add_im2 <= (others => '0');
        elsif ( rising_edge( clk ) ) then
		      input_valid_stage2 <= input_valid_stage1;
            re2_add_im2 <= re_multiply_re_stage1 + im_multiply_re_stage1;
        end if;
    end process p_add_stage2;
	 
	 -- calculation of sqrt (one sqrt caluation needs 16 clks with G_DATA_W => 32 
    -- for continous stream 17 sqrt instances are needed	 
	 p_sqrt_stage3: process ( clk, reset ) is
    begin
        if ( reset = '1' ) then
            input_valid_stage3 <= '0'; 
				index_sqrt <= 0;
				input_sqrt(0) <= (others => '0');
				input_sqrt(1) <= (others => '0');
				input_sqrt(2) <= (others => '0');
				input_sqrt(3) <= (others => '0');
				input_sqrt(4) <= (others => '0');
				input_sqrt(5) <= (others => '0');
				input_sqrt(6) <= (others => '0');
				input_sqrt(7) <= (others => '0');
				input_sqrt(8) <= (others => '0');
				input_sqrt(9) <= (others => '0');
				input_sqrt(10) <= (others => '0');
				input_sqrt(11) <= (others => '0');
				input_sqrt(12) <= (others => '0');
				input_sqrt(13) <= (others => '0');
				input_sqrt(14) <= (others => '0');
				input_sqrt(15) <= (others => '0');
            input_sqrt(16) <= (others => '0');
        elsif ( rising_edge( clk ) ) then
		      input_valid_stage3 <= input_valid_stage2;	
			   if input_valid_stage2 = '1' then	
					if index_sqrt = 16 then
						 index_sqrt	<= 0;
					else
						 index_sqrt <= index_sqrt +1;
					end if;				
				end if;
				 case index_sqrt is
              when 16 => input_sqrt(16) <= std_logic_vector(re2_add_im2(63 downto 32));
				  when 15 => input_sqrt(15) <= std_logic_vector(re2_add_im2(63 downto 32));
				  when 14 => input_sqrt(14) <= std_logic_vector(re2_add_im2(63 downto 32));
				  when 13 => input_sqrt(13) <= std_logic_vector(re2_add_im2(63 downto 32));
				  when 12 => input_sqrt(12) <= std_logic_vector(re2_add_im2(63 downto 32));
				  when 11 => input_sqrt(11) <= std_logic_vector(re2_add_im2(63 downto 32));
				  when 10 => input_sqrt(10) <= std_logic_vector(re2_add_im2(63 downto 32));
				  when 9 => input_sqrt(9) <= std_logic_vector(re2_add_im2(63 downto 32));
				  when 8 => input_sqrt(8) <= std_logic_vector(re2_add_im2(63 downto 32));
				  when 7 => input_sqrt(7) <= std_logic_vector(re2_add_im2(63 downto 32));
				  when 6 => input_sqrt(6) <= std_logic_vector(re2_add_im2(63 downto 32));
				  when 5 => input_sqrt(5) <= std_logic_vector(re2_add_im2(63 downto 32));
				  when 4 => input_sqrt(4) <= std_logic_vector(re2_add_im2(63 downto 32));
				  when 3 => input_sqrt(3) <= std_logic_vector(re2_add_im2(63 downto 32));
				  when 2 => input_sqrt(2) <= std_logic_vector(re2_add_im2(63 downto 32));
				  when 1 => input_sqrt(1) <= std_logic_vector(re2_add_im2(63 downto 32));
				  when 0 => input_sqrt(0) <= std_logic_vector(re2_add_im2(63 downto 32));
				  when others => null;
				  end case;
        end if;
    end process p_sqrt_stage3;
	
	 -- generate sqrt instances for continous data stream	 
	gen_sqrt_array: for i in 0 to 16 generate		
	sqrt_module : entity work.squareRoot_pipe
		  generic map (
            G_DATA_W => 32
        )
        port map (
            clk => clk,
            rst => reset,
            iv_data => input_sqrt(i),
            ov_res => output_sqrt(i)
        );
	end generate gen_sqrt_array;  
		  
    -- output assignment
    p_output_stage4: process ( clk, reset ) is
    begin
        if ( reset = '1' ) then
            output_valid <= '0'; 
				output_magnitude <= (others => '0');
				output_delay_sqrt <= (others => '0');
        elsif ( rising_edge( clk ) ) then
		      output_delay_sqrt <= output_delay_sqrt(14 downto 0) & input_valid_stage3;
				output_valid <= output_delay_sqrt(15);
				output_magnitude <= std_logic_vector(output_sqrt(index_sqrt)) & x"0000";
        end if;
    end process p_output_stage4;

	 
end architecture rtl;