LIBRARY ieee;
USE ieee.std_logic_1164.ALL;
USE ieee.std_logic_signed.ALL;
USE ieee.numeric_std.ALL;
USE work.FIVEn_DFT_PKG.ALL;

ENTITY WINOGRAD5 IS
  PORT(
    i_clk     : IN  std_logic;
    i_data_im : IN  vect_input_winograd5_5ndft;
    i_data_re : IN  vect_input_winograd5_5ndft;
    o_data_im : OUT vect_output_winograd5_5ndft;
    o_data_re : OUT vect_output_winograd5_5ndft
    );
END WINOGRAD5;

ARCHITECTURE Behavioral OF WINOGRAD5 IS

  SIGNAL mult_factors              : vect_mult_factors_32b          := ("10110000000000000000000000000000", "00100011110001101110111100110111", "00010111001111111101011000011110", "01100010011111000110001000101111", "00100101100111100100011000001001");
  -- multipliers multiplied by 2^30
  SIGNAL mult_factor_w_multipliers : vect_mult_factor_w_multipliers := (OTHERS => (OTHERS => '0'));

  --SIGNAL matrix_stages_signed_im             : matrix_winograd5_generic_signed_stages                              := (OTHERS => (OTHERS => (OTHERS => '0')));
  --SIGNAL matrix_stages_signed_re             : matrix_winograd5_generic_signed_stages                              := (OTHERS => (OTHERS => (OTHERS => '0')));
  SIGNAL matrix_stages_im                    : matrix_winograd5_generic_stages                                     := (OTHERS => (OTHERS => (OTHERS => '0')));  --matrix(x)(y) = matrix(stage)(layer) form
  SIGNAL matrix_stages_re                    : matrix_winograd5_generic_stages                                     := (OTHERS => (OTHERS => (OTHERS => '0')));  -- matrix(x)(y) = matrix(stage)(layer) form
  SIGNAL multiply_by_2_power_cst_w_precision : std_logic_vector(cst_w_precision_winograd5_coeffs_5ndft-3 DOWNTO 0) := (OTHERS => '0');
  --SIGNAL in1, in2        :    smpl_out_winograd5_5ndft;
  --SIGNAL isigned         :    smpl_out_winograd5_signed_5ndft;





  FUNCTION add_sub (
    w_smpl   : IN natural;
    sub      : IN boolean;
    in1, in2 : IN smpl_out_winograd5_5ndft)
    RETURN smpl_out_winograd5_5ndft IS
    VARIABLE smpl_stages_out : smpl_out_winograd5_5ndft := (OTHERS => '0');
    VARIABLE isigned         : smpl_out_winograd5_signed_5ndft;
  BEGIN
    IF sub THEN
      isigned(w_smpl DOWNTO 0) := signed(in1(w_smpl-1)&in1(w_smpl-1 DOWNTO 0))-signed(in2(w_smpl-1)&in2(w_smpl-1 DOWNTO 0));
    ELSE
      isigned(w_smpl DOWNTO 0) := signed(in1(w_smpl-1)&in1(w_smpl-1 DOWNTO 0))+signed(in2(w_smpl-1)&in2(w_smpl-1 DOWNTO 0));
    END IF;

    smpl_stages_out(w_smpl DOWNTO 0) := std_logic_vector(unsigned(isigned(w_smpl DOWNTO 0)));
    RETURN smpl_stages_out;
  END FUNCTION;





  FUNCTION mult (
    w_smpl  : IN natural;
    cmplx   : IN boolean;
    w_coeff : IN natural;
    input   : IN smpl_out_winograd5_5ndft;
    coeff   : IN smpl_mult_factor_w_multipliers
    )
    RETURN std_logic_vector IS
    VARIABLE smpl_signed : smpl_out_winograd5_signed_5ndft := (OTHERS => '0');
    VARIABLE smpl_out    : smpl_out_winograd5_5ndft;
  BEGIN  -- FUNCTION mult
    IF(cmplx) THEN
      smpl_signed(w_smpl+w_coeff-1 DOWNTO 0) := signed(input(w_smpl-1 DOWNTO 0)) * (-signed(coeff));
    ELSE
      smpl_signed(w_smpl+w_coeff-1 DOWNTO 0) := signed(input(w_smpl-1 DOWNTO 0)) * signed(coeff);
    END IF;
    smpl_out := std_logic_vector(unsigned(smpl_signed));
    RETURN smpl_out;
  END FUNCTION mult;





  FUNCTION smpl_copy (
    w_smpl  : natural;
    smpl_in : smpl_out_winograd5_5ndft)
    RETURN std_logic_vector IS
    VARIABLE smpl_out : smpl_out_winograd5_5ndft := (OTHERS => '0');
  BEGIN  -- FUNCTION copy
    smpl_out(w_smpl DOWNTO 0) := smpl_in(w_smpl-1)&smpl_in(w_smpl-1 DOWNTO 0);
    RETURN smpl_out;  --smpl_in(cst_winograd5_w_out_5ndft-1 DOWNTO w_smpl+1)&smpl_in(w_smpl-1)&smpl_in(w_smpl-1 DOWNTO 0);--smpl_out;
  END FUNCTION smpl_copy;


BEGIN


  fill_input_output_matrix_for : FOR i IN 0 TO 4 GENERATE

    matrix_stages_im(0)(i)(cst_w_in_5ndft-1 DOWNTO 0) <= i_data_im(i)(cst_w_in_5ndft-1 DOWNTO 0);
    matrix_stages_re(0)(i)(cst_w_in_5ndft-1 DOWNTO 0) <= i_data_re(i)(cst_w_in_5ndft-1 DOWNTO 0);
    o_data_im(i)                                      <= matrix_stages_im(7)(i);
    o_data_re(i)                                      <= matrix_stages_re(7)(i);

  END GENERATE fill_input_output_matrix_for;



  -- purpose: Rounds the data in 32bits into cst_w_precision_winograd5_coeffs_5ndft bits (round for
  -- MSBs). Should run and cut before simulation and bitstream
  -- inputs : the coeffs to be cut mult_factors
  -- outputs: the cut coeffs mult_factor_w_multipliers
  mult_coeffs_cut : PROCESS(mult_factors)
  BEGIN
    FOR i IN 1 TO 5 LOOP
      mult_factor_w_multipliers(i) <= mult_factors(i)(31 DOWNTO 32-cst_w_precision_winograd5_coeffs_5ndft);
      IF(mult_factors(i)(31-cst_w_precision_winograd5_coeffs_5ndft) = '1') THEN
        mult_factor_w_multipliers(i) <= std_logic_vector(unsigned(signed(mult_factors(i)(31 DOWNTO 32-cst_w_precision_winograd5_coeffs_5ndft)) +1));
      END IF;
    END LOOP;  -- i
  END PROCESS mult_coeffs_cut;



  -- purpose: calculates each stage following the winograd5 schema
  -- inputs:  matrix_stages_im(0)
  -- outputs: matrix_stages_im(7)
  calculations_process : PROCESS(i_clk)
    VARIABLE w_smpl : natural;
  BEGIN

    IF(rising_edge(i_clk)) THEN

      -- stage 1
      -- w_smpl              :=== cst_w_in_5ndft+1;
      --data0
      matrix_stages_im(1)(0) <= smpl_copy(w_smpl => cst_w_in_5ndft, smpl_in => matrix_stages_im(0)(0));
      matrix_stages_re(1)(0) <= smpl_copy(w_smpl => cst_w_in_5ndft, smpl_in => matrix_stages_re(0)(0));

      -- data1
      matrix_stages_im(1)(1) <= add_sub (w_smpl => cst_w_in_5ndft, sub => false, in1 => matrix_stages_im(0)(3), in2 => matrix_stages_im(0)(2));
      matrix_stages_re(1)(1) <= add_sub (w_smpl => cst_w_in_5ndft, sub => false, in1 => matrix_stages_re(0)(3), in2 => matrix_stages_re(0)(2));

      --data2
      matrix_stages_im(1)(2) <= add_sub (w_smpl => cst_w_in_5ndft, sub => false, in1 => matrix_stages_im(0)(4), in2 => matrix_stages_im(0)(1));
      matrix_stages_re(1)(2) <= add_sub (w_smpl => cst_w_in_5ndft, sub => false, in1 => matrix_stages_re(0)(4), in2 => matrix_stages_re(0)(1));

      --data3
      matrix_stages_im(1)(3) <= add_sub (w_smpl => cst_w_in_5ndft, sub => true, in1 => matrix_stages_im(0)(3), in2 => matrix_stages_im(0)(2));
      matrix_stages_re(1)(3) <= add_sub (w_smpl => cst_w_in_5ndft, sub => true, in1 => matrix_stages_re(0)(3), in2 => matrix_stages_re(0)(2));

      --data4
      matrix_stages_im(1)(4) <= add_sub (w_smpl => cst_w_in_5ndft, sub => true, in1 => matrix_stages_im(0)(1), in2 => matrix_stages_im(0)(4));
      matrix_stages_re(1)(4) <= add_sub (w_smpl => cst_w_in_5ndft, sub => true, in1 => matrix_stages_re(0)(1), in2 => matrix_stages_re(0)(4));





      -- stage 2
      w_smpl                 := cst_w_in_5ndft+1;
      -- data0,3,4
      matrix_stages_im(2)(0) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_im(1)(0));
      matrix_stages_re(2)(0) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_re(1)(0));
      matrix_stages_im(2)(3) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_im(1)(3));
      matrix_stages_re(2)(3) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_re(1)(3));
      matrix_stages_im(2)(4) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_im(1)(4));
      matrix_stages_re(2)(4) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_re(1)(4));

      --data1
      matrix_stages_im(2)(1) <= add_sub (w_smpl => w_smpl, sub => false, in1 => matrix_stages_im(1)(1), in2 => matrix_stages_im(1)(2));
      matrix_stages_re(2)(1) <= add_sub (w_smpl => w_smpl, sub => false, in1 => matrix_stages_re(1)(1), in2 => matrix_stages_re(1)(2));

      --data2
      matrix_stages_re(2)(2) <= add_sub(w_smpl => w_smpl, sub => true, in1 => matrix_stages_re(1)(2), in2 => matrix_stages_re(1)(1));
      matrix_stages_im(2)(2) <= add_sub(w_smpl => w_smpl, sub => true, in1 => matrix_stages_im(1)(2), in2 => matrix_stages_im(1)(1));

      --data5
      matrix_stages_re(2)(5) <= add_sub(w_smpl => w_smpl, sub => false, in1 => matrix_stages_re(1)(3), in2 => matrix_stages_re(1)(4));
      matrix_stages_im(2)(5) <= add_sub(w_smpl => w_smpl, sub => false, in1 => matrix_stages_im(1)(3), in2 => matrix_stages_im(1)(4));





      -- stage 3
      w_smpl                 := cst_w_in_5ndft+2;
      --data1,2,3,4,5
      matrix_stages_im(3)(1) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_im(2)(1));
      matrix_stages_re(3)(1) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_re(2)(1));
      matrix_stages_im(3)(2) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_im(2)(2));
      matrix_stages_re(3)(2) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_re(2)(2));
      matrix_stages_im(3)(3) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_im(2)(3));
      matrix_stages_re(3)(3) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_re(2)(3));
      matrix_stages_im(3)(4) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_im(2)(4));
      matrix_stages_re(3)(4) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_re(2)(4));
      matrix_stages_im(3)(5) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_im(2)(5));
      matrix_stages_re(3)(5) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_re(2)(5));

      --data0
      matrix_stages_im(3)(0) <= add_sub(w_smpl => w_smpl, sub => false, in1 => matrix_stages_im(2)(1), in2 => matrix_stages_im(2)(0));
      matrix_stages_re(3)(0) <= add_sub(w_smpl => w_smpl, sub => false, in1 => matrix_stages_re(2)(1), in2 => matrix_stages_re(2)(0));





      -- stage 4, multiply
      w_smpl                                                                           := cst_w_in_5ndft+3;
      --data0, multiply by 1 = copy
      matrix_stages_im(4)(0)(w_smpl+cst_w_precision_winograd5_coeffs_5ndft-1 DOWNTO 0) <= matrix_stages_im(3)(0)(w_smpl-1)&matrix_stages_im(3)(0)(w_smpl-1)&matrix_stages_im(3)(0)(w_smpl-1 DOWNTO 0)&multiply_by_2_power_cst_w_precision;
      matrix_stages_re(4)(0)(w_smpl+cst_w_precision_winograd5_coeffs_5ndft-1 DOWNTO 0) <= matrix_stages_re(3)(0)(w_smpl-1)&matrix_stages_re(3)(0)(w_smpl-1)&matrix_stages_re(3)(0)(w_smpl-1 DOWNTO 0)&multiply_by_2_power_cst_w_precision;

      --data1
      matrix_stages_im(4)(1) <= mult (w_smpl => w_smpl, cmplx => false, w_coeff => cst_w_precision_winograd5_coeffs_5ndft, input => matrix_stages_im(3)(1), coeff => mult_factor_w_multipliers(1));
      matrix_stages_re(4)(1) <= mult (w_smpl => w_smpl, cmplx => false, w_coeff => cst_w_precision_winograd5_coeffs_5ndft, input => matrix_stages_re(3)(1), coeff => mult_factor_w_multipliers(1));

      --data2
      matrix_stages_im(4)(2) <= mult (w_smpl => w_smpl, cmplx => false, w_coeff => cst_w_precision_winograd5_coeffs_5ndft, input => matrix_stages_im(3)(2), coeff => mult_factor_w_multipliers(2));
      matrix_stages_re(4)(2) <= mult (w_smpl => w_smpl, cmplx => false, w_coeff => cst_w_precision_winograd5_coeffs_5ndft, input => matrix_stages_re(3)(2), coeff => mult_factor_w_multipliers(2));

      --data3
      matrix_stages_re(4)(3) <= mult (w_smpl => w_smpl, cmplx => true, w_coeff => cst_w_precision_winograd5_coeffs_5ndft, input => matrix_stages_im(3)(3), coeff => mult_factor_w_multipliers(3));
      matrix_stages_im(4)(3) <= mult (w_smpl => w_smpl, cmplx => false, w_coeff => cst_w_precision_winograd5_coeffs_5ndft, input => matrix_stages_re(3)(3), coeff => mult_factor_w_multipliers(3));

      --data4
      matrix_stages_re(4)(4) <= mult (w_smpl => w_smpl, cmplx => true, w_coeff => cst_w_precision_winograd5_coeffs_5ndft, input => matrix_stages_im(3)(4), coeff => mult_factor_w_multipliers(4));
      matrix_stages_im(4)(4) <= mult (w_smpl => w_smpl, cmplx => false, w_coeff => cst_w_precision_winograd5_coeffs_5ndft, input => matrix_stages_re(3)(4), coeff => mult_factor_w_multipliers(4));

      --data5
      matrix_stages_re(4)(5) <= mult (w_smpl => w_smpl, cmplx => true, w_coeff => cst_w_precision_winograd5_coeffs_5ndft, input => matrix_stages_im(3)(5), coeff => mult_factor_w_multipliers(5));
      matrix_stages_im(4)(5) <= mult (w_smpl => w_smpl, cmplx => false, w_coeff => cst_w_precision_winograd5_coeffs_5ndft, input => matrix_stages_re(3)(5), coeff => mult_factor_w_multipliers(5));





      -- stage 5
      w_smpl                 := cst_w_in_5ndft+3+cst_w_precision_winograd5_coeffs_5ndft;
      --data0, 2, 3, 4, 5
      matrix_stages_im(5)(0) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_im(4)(0));
      matrix_stages_re(5)(0) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_re(4)(0));
      matrix_stages_im(5)(2) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_im(4)(2));
      matrix_stages_re(5)(2) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_re(4)(2));
      matrix_stages_im(5)(3) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_im(4)(3));
      matrix_stages_re(5)(3) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_re(4)(3));
      matrix_stages_im(5)(4) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_im(4)(4));
      matrix_stages_re(5)(4) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_re(4)(4));
      matrix_stages_im(5)(5) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_im(4)(5));
      matrix_stages_re(5)(5) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_re(4)(5));

      --data1
      matrix_stages_re(5)(1) <= add_sub(w_smpl => w_smpl, sub => false, IN1 => matrix_stages_re(4)(1), in2 => matrix_stages_re(4)(0));
      matrix_stages_im(5)(1) <= add_sub(w_smpl => w_smpl, sub => false, in1 => matrix_stages_im(4)(1), in2 => matrix_stages_im(4)(0));





      -- stage 6
      w_smpl                 := cst_w_in_5ndft+3+cst_w_precision_winograd5_coeffs_5ndft+1;
      --data0
      matrix_stages_im(6)(0) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_im(5)(0));
      matrix_stages_re(6)(0) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_re(5)(0));

      --data1
      matrix_stages_re(6)(1) <= add_sub(w_smpl => w_smpl, sub => true, in1 => matrix_stages_re(5)(1), in2 => matrix_stages_re(5)(2));
      matrix_stages_im(6)(1) <= add_sub(w_smpl => w_smpl, sub => true, in1 => matrix_stages_im(5)(1), in2 => matrix_stages_im(5)(2));

      --data2
      matrix_stages_re(6)(2) <= add_sub(w_smpl => w_smpl, sub => false, in1 => matrix_stages_re(5)(1), in2 => matrix_stages_re(5)(2));
      matrix_stages_im(6)(2) <= add_sub(w_smpl => w_smpl, sub => false, in1 => matrix_stages_im(5)(1), in2 => matrix_stages_im(5)(2));

      --data3
      matrix_stages_re(6)(3) <= add_sub(w_smpl => w_smpl, sub => false, in1 => matrix_stages_re(5)(5), in2 => matrix_stages_re(5)(3));
      matrix_stages_im(6)(3) <= add_sub(w_smpl => w_smpl, sub => false, IN1 => matrix_stages_im(5)(5), in2 => matrix_stages_im(5)(3));

      --data4
      matrix_stages_re(6)(4) <= add_sub(w_smpl => w_smpl, sub => true, in1 => matrix_stages_re(5)(5), in2 => matrix_stages_re(5)(4));
      matrix_stages_im(6)(4) <= add_sub(w_smpl => w_smpl, sub => true, in1 => matrix_stages_im(5)(5), in2 => matrix_stages_im(5)(4));





      -- stage 7
      w_smpl                 := cst_w_in_5ndft+3+cst_w_precision_winograd5_coeffs_5ndft+2;
      --data0
      matrix_stages_im(7)(0) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_im(6)(0));
      matrix_stages_re(7)(0) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_re(6)(0));

      --data1
      matrix_stages_re(7)(1) <= add_sub(w_smpl => w_smpl, sub => false, in1 => matrix_stages_re(6)(2), in2 => matrix_stages_re(6)(4));
      matrix_stages_im(7)(1) <= add_sub(w_smpl => w_smpl, sub => false, in1 => matrix_stages_im(6)(2), in2 => matrix_stages_im(6)(4));

      --data2
      matrix_stages_re(7)(2) <= add_sub(w_smpl => w_smpl, sub => true, in1 => matrix_stages_re(6)(1), in2 => matrix_stages_re(6)(3));
      matrix_stages_im(7)(2) <= add_sub(w_smpl => w_smpl, sub => true, in1 => matrix_stages_im(6)(1), in2 => matrix_stages_im(6)(3));

      --data3
      matrix_stages_re(7)(3) <= add_sub(w_smpl => w_smpl, sub => false, in1 => matrix_stages_re(6)(1), in2 => matrix_stages_re(6)(3));
      matrix_stages_im(7)(3) <= add_sub(w_smpl => w_smpl, sub => false, in1 => matrix_stages_im(6)(1), in2 => matrix_stages_im(6)(3));

      --data4
      matrix_stages_re(7)(4) <= add_sub(w_smpl => w_smpl, sub => true, in1 => matrix_stages_re(6)(2), in2 => matrix_stages_re(6)(4));
      matrix_stages_im(7)(4) <= add_sub(w_smpl => w_smpl, sub => true, in1 => matrix_stages_im(6)(2), in2 => matrix_stages_im(6)(4));

    END IF;
  END PROCESS calculations_process;


END Behavioral;