Transférer les fichiers vers '5ndft'

5ndft update v0.2
3 years ago · d86f7016c6
--- a/5ndft/5k_DFT_pkg.vhd
+++ b/5ndft/5k_DFT_pkg.vhd
@@ -0,0 +1,73 @@
 LIBRARY ieee;
 USE ieee.std_logic_1164.ALL;
 USE ieee.numeric_std.ALL;
 USE ieee.std_logic_unsigned.ALL;
 USE ieee.std_logic_signed.ALL;
 USE work.PFB_PKG.ALL;

 -- NOTE:
 -- The number of in/output samples must be a 2^n factor of 5, < 160

 PACKAGE FIVEn_DFT_PKG IS

  CONSTANT cst_w_in_5ndft  : natural := cst_w_polyfir_out_dft_in_pfb;  -- input_bitwidth
  CONSTANT cst_w_out_5ndft : natural := cst_w_out_pfb;  -- output bitwidth, must be < 30

  CONSTANT cst_nb_samples_in_5ndft : natural := cst_nb_subfilters_pfb;  -- must be 5*n

  CONSTANT cst_w_precision_winograd5_coeffs_5ndft : natural := cst_w_precision_winograd5_coeffs_pfb;  -- must be <= 32 and >= 3, sign bit included. Ideal is >= 8
  CONSTANT cst_w_precision_radix2_coeffs_5ndft    : natural := cst_w_precision_radix2_coeffs_pfb;  -- must be <= 32 and >= 3, sign bit included. Ideal is >= 8

  -- CALCULATIONS --

  CONSTANT cst_nb_parallel_winograd5 : natural := cst_nb_samples_in_5ndft/5;

  CONSTANT cst_log2_nb_parallel_winograd5 : natural := cst_log2_nb_parallel_winograd_pfb;  -- = log2(cst_nb_parallel_winograd5)
  CONSTANT cst_w_winograd_added           : natural := cst_w_precision_winograd5_coeffs_5ndft+6;  -- 6 is the number of addition stages in the winograd5 blk
  CONSTANT cst_w_radix2_added             : natural := cst_w_precision_radix2_coeffs_5ndft+2;
  CONSTANT cst_dft_w_out_5ndft            : natural := cst_w_in_5ndft+cst_w_winograd_added+(cst_log2_nb_parallel_winograd5*cst_w_radix2_added);
  CONSTANT cst_winograd5_w_out_5ndft      : natural := cst_w_in_5ndft+cst_w_winograd_added;  -- 6 is the number of addition stages in the winograd5 blk
  CONSTANT cst_nb_wn_coeffs               : natural := cst_nb_samples_in_5ndft/2;

  -- TYPES --

  SUBTYPE smpl_in_5ndft IS smpl_real_imag_polyfir_out_dft_in_pfb;
  SUBTYPE smpl_out_5ndft IS smpl_real_imag_dft_out_pfb;  --std_logic_vector(cst_w_out_5ndft-1 DOWNTO 0);

  TYPE vect_dft_input IS ARRAY (0 TO cst_nb_samples_in_5ndft-1) OF smpl_in_5ndft;
  SUBTYPE vect_dft_output IS vect_dft_output_pfb;  --ARRAY (0 TO cst_nb_samples_in_5ndft-1) OF smpl_out_5ndft;
  -- winograd5
  SUBTYPE smpl_out_winograd5_5ndft IS std_logic_vector(cst_winograd5_w_out_5ndft-1 DOWNTO 0);
  SUBTYPE smpl_out_winograd5_signed_5ndft IS signed(cst_winograd5_w_out_5ndft-1 DOWNTO 0);

  TYPE vect_input_winograd5_5ndft IS ARRAY (0 TO 4) OF smpl_in_5ndft;
  TYPE vect_output_winograd5_5ndft IS ARRAY (0 TO 4) OF smpl_out_winograd5_5ndft;
  TYPE vect_total_output_winograd5_cells IS ARRAY (0 TO cst_nb_samples_in_5ndft-1) OF smpl_out_winograd5_5ndft;

  TYPE vect_winograd5_generic_stage IS ARRAY (0 TO 5) OF smpl_out_winograd5_5ndft;
  TYPE matrix_winograd5_generic_stages IS ARRAY (0 TO 7) OF vect_winograd5_generic_stage;

  SUBTYPE smpl_mult_factor_w_multipliers IS std_logic_vector(cst_w_precision_winograd5_coeffs_5ndft-1 DOWNTO 0);
  SUBTYPE smpl_mult_factor_w_multipliers_signed IS signed(cst_w_precision_winograd5_coeffs_5ndft-1 DOWNTO 0);
  TYPE vect_mult_factors_32b IS ARRAY (1 TO 5) OF std_logic_vector(31 DOWNTO 0);
  TYPE vect_mult_factor_w_multipliers IS ARRAY (1 TO 5) OF smpl_mult_factor_w_multipliers;

  --radix2
  SUBTYPE smpl_cos_sin_wb IS std_logic_vector(cst_w_precision_radix2_coeffs_5ndft-1 DOWNTO 0);
  SUBTYPE smpl_cos_sin_signed_wb IS signed(cst_w_precision_radix2_coeffs_5ndft-1 DOWNTO 0);
  TYPE vect_cos_sin_k_pi_over_5_32b IS ARRAY(0 TO 4) OF std_logic_vector(31 DOWNTO 0);
  TYPE vect_cos_sin_k_pi_over_5_wb IS ARRAY(0 TO 4) OF smpl_cos_sin_wb;
  TYPE vect_cos_sin_k_pi_over_80_32b IS ARRAY (0 TO cst_nb_wn_coeffs-1) OF std_logic_vector(31 DOWNTO 0);

  SUBTYPE smpl_out_radix2 IS std_logic_vector(cst_dft_w_out_5ndft-1 DOWNTO 0);
  SUBTYPE smpl_out_signed_radix2 IS signed(cst_dft_w_out_5ndft-1 DOWNTO 0);
  TYPE vect_radix2_fft_line IS ARRAY (0 TO cst_log2_nb_parallel_winograd5) OF smpl_out_radix2;
  TYPE vect_radix2_line IS ARRAY (0 TO 3) OF smpl_out_radix2;
  TYPE matrix_radix2_cell IS ARRAY (0 TO 1) OF vect_radix2_line;

  -- whole fft
  TYPE matrix_fft_stages IS ARRAY (0 TO cst_nb_samples_in_5ndft-1) OF vect_radix2_fft_line;  -- inputs and outputs of radix2 cells

 END;


--- a/5ndft/FFT_tree.vhd
+++ b/5ndft/FFT_tree.vhd
@@ -0,0 +1,263 @@
 LIBRARY ieee;
 USE ieee.std_logic_1164.ALL;
 USE ieee.std_logic_signed.ALL;
 USE ieee.numeric_std.ALL;
 USE work.FIVEn_DFT_PKG.ALL;
 USE work.coeff_5ndft.ALL;


 ENTITY FFT_tree IS
  GENERIC(
    nb_bits_shift_round : IN natural);
  PORT(
    i_clk     : IN  std_logic;
    i_data_re : IN  vect_dft_input;
    i_data_im : IN  vect_dft_input;
    o_data_re : OUT vect_dft_output := (OTHERS => (OTHERS => '0'));
    o_data_im : OUT vect_dft_output := (OTHERS => (OTHERS => '0'))
    );
 END FFT_tree;

 ARCHITECTURE instanciating_cells OF FFT_tree IS

  SIGNAL zero : std_logic_vector(0 DOWNTO 0) := "0";

  SIGNAL cos_k_pi_over_5 : vect_cos_sin_k_pi_over_5_32b := ("01000000000000000000000000000000", "00110011110001101110111100110111", "00010011110001101110111100110111", "11101100001110010001000011001001", "11001100001110010001000011001001");  -- coeffs multiplied by 2^30
  SIGNAL sin_k_pi_over_5 : vect_cos_sin_k_pi_over_5_32b := ("00000000000000000000000000000000", "11011010011000011011100111110111", "11000011001000011110001111011001", "11000011001000011110001111011001", "11011010011000011011100111110111");  -- coeffs multiplied by 2^30

  -- purpose: give the proper arrangement for the winograd5 blks
  -- the right order: even numbers then odd numbers. The even numbers are the
  -- result of others even_odd order multiplied by 2, the odd numbers are the
  -- result of others even_odd order multiplied by 2+1.
  -- input: the nth winograd5 instance when sort
  -- output: the corresponding winograd5 instance number when sort
  FUNCTION rearrange (
    i : IN natural)
    RETURN natural IS
    TYPE vect_rearrange IS ARRAY (0 TO cst_nb_samples_in_5ndft) OF natural;
    TYPE matrix_rearrange IS ARRAY (0 TO cst_log2_nb_parallel_winograd5) OF vect_rearrange;
    VARIABLE matrix_affect_rearrange : matrix_rearrange := (OTHERS => (OTHERS => 0));
  BEGIN  -- FUNCTION rearrange
    IF cst_log2_nb_parallel_winograd5 > 0 THEN
      matrix_affect_rearrange(1)(1) := 1;
      FOR stage IN 2 TO cst_log2_nb_parallel_winograd5 LOOP
        FOR i IN 0 TO 2**(stage-1)-1 LOOP
          matrix_affect_rearrange(stage)(i)              := matrix_affect_rearrange(stage-1)(i)*2;
          matrix_affect_rearrange(stage)(i+2**(stage-1)) := matrix_affect_rearrange(stage-1)(i)*2+1;
        END LOOP;  -- i
      END LOOP;  -- stage
      RETURN matrix_affect_rearrange(cst_log2_nb_parallel_winograd5)(i);
    ELSE
      RETURN 0;
    END IF;
  END FUNCTION rearrange;

  TYPE matrix_input_winograd5_5ndft IS ARRAY (0 TO cst_nb_parallel_winograd5-1) OF vect_input_winograd5_5ndft;
  TYPE matrix_output_winograd5_5ndft IS ARRAY (0 TO cst_nb_parallel_winograd5-1) OF vect_output_winograd5_5ndft;

  SIGNAL cos_k_pi_over_5_wb : vect_cos_sin_k_pi_over_5_wb := (OTHERS => (OTHERS => '0'));
  SIGNAL sin_k_pi_over_5_wb : vect_cos_sin_k_pi_over_5_wb := (OTHERS => (OTHERS => '0'));

  SIGNAL data_out_winograd5_re        : vect_total_output_winograd5_cells := (OTHERS => (OTHERS => '0'));
  SIGNAL data_out_winograd5_im        : vect_total_output_winograd5_cells := (OTHERS => (OTHERS => '0'));
  SIGNAL winograd_rearranged_input_re : vect_dft_input                    := (OTHERS => (OTHERS => '0'));
  SIGNAL winograd_rearranged_input_im : vect_dft_input                    := (OTHERS => (OTHERS => '0'));

  SIGNAL matrix_inputs_outputs_radix2_cells_re : matrix_fft_stages := (OTHERS => (OTHERS => (OTHERS => '0')));
  SIGNAL matrix_inputs_outputs_radix2_cells_im : matrix_fft_stages := (OTHERS => (OTHERS => (OTHERS => '0')));

  SIGNAL vect_input_1_cell_winograd5_re  : matrix_input_winograd5_5ndft  := (OTHERS => (OTHERS => (OTHERS => '0')));
  SIGNAL vect_input_1_cell_winograd5_im  : matrix_input_winograd5_5ndft  := (OTHERS => (OTHERS => (OTHERS => '0')));
  SIGNAL vect_output_1_cell_winograd5_re : matrix_output_winograd5_5ndft := (OTHERS => (OTHERS => (OTHERS => '0')));
  SIGNAL vect_output_1_cell_winograd5_im : matrix_output_winograd5_5ndft := (OTHERS => (OTHERS => (OTHERS => '0')));
 BEGIN  -- ARCHITECTURE instanciating_cells



  -- coeffs cut

  -- purpose: Rounds the data in 32bits into cst_w_precision_radix2_coeffs_5ndft bits (round for
  -- MSBs). Should run and cut before simulation and bitstream
  -- inputs : the coeffs to be cut sin_k_pi_over_5, cos_k_pi_over_5, sin_k_pi_over_80, cos_k_pi_over_80
  -- outputs: the cut coeffs cos_k_pi_over_5_wb, sin_k_pi_over_5_wb, sin_k_pi_over_80_wb, cos_k_pi_over_80_wb
  mult_coeffs_cut : PROCESS(sin_k_pi_over_5, cos_k_pi_over_5)
  BEGIN
    FOR i IN 0 TO 4 LOOP
      cos_k_pi_over_5_wb(i) <= cos_k_pi_over_5(i)(31 DOWNTO 32-cst_w_precision_radix2_coeffs_5ndft);
      sin_k_pi_over_5_wb(i) <= sin_k_pi_over_5(i)(31 DOWNTO 32-cst_w_precision_radix2_coeffs_5ndft);
      IF(cos_k_pi_over_5(i)(32-cst_w_precision_radix2_coeffs_5ndft-1) = '1') THEN
        cos_k_pi_over_5_wb(i) <= std_logic_vector(unsigned(signed(cos_k_pi_over_5(i)(31 DOWNTO 32-cst_w_precision_radix2_coeffs_5ndft))+1));
      END IF;
      IF(sin_k_pi_over_5(i)(32-cst_w_precision_radix2_coeffs_5ndft-1) = '1') THEN
        sin_k_pi_over_5_wb(i) <= std_logic_vector(unsigned(signed(sin_k_pi_over_5(i)(31 DOWNTO 32-cst_w_precision_radix2_coeffs_5ndft))+1));
      END IF;
    END LOOP;  -- i

  END PROCESS mult_coeffs_cut;





  -- inputs rearranging

  -- purpose: affects inputs on their right way (cf. schema and explanations)
  -- using the rearrange(i) function, which creates the right winograd
  -- instances numbers order. Rearranging inputs order is equivalent to rearranging
  -- winograd5 order.
  -- inputs: i_data_re, i_data_im
  -- outputs: winograd_rearranged_input_re, winograd_rearranged_input_im
  rearrange_winograd_input : FOR i IN 0 TO cst_nb_parallel_winograd5-1 GENERATE
    five_inputs : FOR j IN 0 TO 4 GENERATE
      winograd_rearranged_input_re(rearrange(i => i)*5+j) <= i_data_re(j*cst_nb_parallel_winograd5+i);
      winograd_rearranged_input_im(rearrange(i => i)*5+j) <= i_data_im(j*cst_nb_parallel_winograd5+i);
    END GENERATE five_inputs;
  END GENERATE rearrange_winograd_input;



  -- winograd instanciating

  -- purpose: wiring; instanciates the parallel winograd stage with their correct inputs and outputs
  -- inputs: winograd_rearranged_input_im, winograd_rearranged_input_re (length
  -- cst_nb_samples_in_5ndft each, cut then into subvectors of 5 inputs)
  -- instances: WINOGRAD5(Behavioral)
  -- outputs: vect_output_1_cell_winograd5_im, vect_output_1_cell_winograd5_re
  -- (length cst_nb_samples_in_5ndft each, cut then into subvectors of 5 inputs)
  winograd5_instances : FOR i IN 0 TO cst_nb_parallel_winograd5-1 GENERATE
    fill_for : FOR j IN 0 TO 4 GENERATE
      vect_input_1_cell_winograd5_im(i)(j) <= winograd_rearranged_input_im(j+5*i);
      vect_input_1_cell_winograd5_re(i)(j) <= winograd_rearranged_input_re(j+5*i);
      data_out_winograd5_im(j+5*i)         <= vect_output_1_cell_winograd5_im(i)(j);
      data_out_winograd5_re(j+5*i)         <= vect_output_1_cell_winograd5_re(i)(j);
    END GENERATE fill_for;
    winograd5_inst : ENTITY work.WINOGRAD5(Behavioral)
      PORT MAP(
        i_clk     => i_clk,
        i_data_im => vect_input_1_cell_winograd5_im(i),
        i_data_re => vect_input_1_cell_winograd5_re(i),
        o_data_im => vect_output_1_cell_winograd5_im(i),
        o_data_re => vect_output_1_cell_winograd5_re(i)
        );
  END GENERATE winograd5_instances;



  -- winograd results

  -- purpose: fill the 0th stage (input stage) of the matrix instanciating the
  -- butterfly (radix2) cells only.
  -- inputs: data_out_winograd5_re, data_out_winograd5_im
  -- outputs: matrix_inputs_outputs_radix2_cells_re(x)(0), matrix_inputs_outputs_radix2_cells_im(x)(0)
  fill_radix2_cells_input_matrix : FOR i IN 0 TO cst_nb_samples_in_5ndft-1 GENERATE
    matrix_inputs_outputs_radix2_cells_re(i)(0)(cst_winograd5_w_out_5ndft-1 DOWNTO 0) <= data_out_winograd5_re(i);
    matrix_inputs_outputs_radix2_cells_im(i)(0)(cst_winograd5_w_out_5ndft-1 DOWNTO 0) <= data_out_winograd5_im(i);
  END GENERATE fill_radix2_cells_input_matrix;



  -- purpose: instanciates the first butterfly cells stage (after the winograd
  -- results). If there is only 1 parallel winograd5 cell, couple_winograd5_nb
  -- will not be use between 0 and -1, so the FOR loop will no execute
  -- inputs: matrix_inputs_outputs_radix2_cells_re(x)(0), matrix_inputs_outputs_radix2_cells_im(x)(0)
  -- instances: radix_2_cell_winograd(radix2)
  -- outputs: matrix_inputs_outputs_radix2_cells_re(x)(1), matrix_inputs_outputs_radix2_cells_im(x)(1)
  radix2_cells_stage1 : FOR couple_winograd5_nb IN 0 TO cst_nb_parallel_winograd5/2-1 GENERATE
    radix2_winograd_out : FOR i IN 0 TO 4 GENERATE
      radix_2_out_winograd_inst : ENTITY work.radix_2_cell_winograd(radix2)
        GENERIC MAP(
          w_in => cst_winograd5_w_out_5ndft
          )
        PORT MAP(
          i_clk      => i_clk,
          i_cos      => cos_k_pi_over_5_wb(i MOD 5),
          i_sin      => sin_k_pi_over_5_wb(i MOD 5),
          i_data1_re => matrix_inputs_outputs_radix2_cells_re(couple_winograd5_nb*10+i)(0),
          i_data1_im => matrix_inputs_outputs_radix2_cells_im(couple_winograd5_nb*10+i)(0),
          i_data2_re => matrix_inputs_outputs_radix2_cells_re(couple_winograd5_nb*10+i+5)(0),
          i_data2_im => matrix_inputs_outputs_radix2_cells_im(couple_winograd5_nb*10+i+5)(0),
          o_data1_re => matrix_inputs_outputs_radix2_cells_re(couple_winograd5_nb*10+i)(1),
          o_data1_im => matrix_inputs_outputs_radix2_cells_im(couple_winograd5_nb*10+i)(1),
          o_data2_re => matrix_inputs_outputs_radix2_cells_re(couple_winograd5_nb*10+i+5)(1),
          o_data2_im => matrix_inputs_outputs_radix2_cells_im(couple_winograd5_nb*10+i+5)(1)
          );
    END GENERATE radix2_winograd_out;
  END GENERATE radix2_cells_stage1;



  -- radix2 cells instanciation

  -- purpose: instanciates all the butterfly stages (except stage 1). The
  -- stages are filled decrementing the stage number
  -- inputs: matrix_inputs_outputs_radix2_cells_re(x)(1), matrix_inputs_outputs_radix2_cells_im(x)(1)
  -- instances: radix_2_cell_winograd(radix2)
  -- outputs: matrix_inputs_outputs_radix2_cells_re(x)(cst_log2_nb_parallel_winograd5), matrix_inputs_outputs_radix2_cells_im(x)(cst_log2_nb_parallel_winograd5)
  stages_generation : FOR stage IN 1 TO cst_log2_nb_parallel_winograd5-1 GENERATE
    k10_blocks : FOR cell_10_nb IN 1 TO 2**(stage-1) GENERATE
      parallel_cells : FOR cell_nb IN (cst_nb_samples_in_5ndft/(2**stage))*(cell_10_nb-1) TO (cst_nb_samples_in_5ndft/(2**stage))*(cell_10_nb)-1 GENERATE

        radix_2_generic_inst : ENTITY work.radix_2_cell_winograd(radix2)
          GENERIC MAP(
            w_in => cst_winograd5_w_out_5ndft+(cst_log2_nb_parallel_winograd5-stage)*cst_w_radix2_added
            )
          PORT MAP(
            i_clk      => i_clk,
            i_cos      => cos_k_pi_over_n_wb((cst_nb_wn_coeffs/10/(2**(cst_log2_nb_parallel_winograd5-stage-1))*cell_nb) MOD cst_nb_wn_coeffs),
            i_sin      => sin_k_pi_over_n_wb((cst_nb_wn_coeffs/10/(2**(cst_log2_nb_parallel_winograd5-stage-1))*cell_nb) MOD cst_nb_wn_coeffs),
            i_data1_re => matrix_inputs_outputs_radix2_cells_re(cell_nb)(cst_log2_nb_parallel_winograd5-stage),
            i_data1_im => matrix_inputs_outputs_radix2_cells_im(cell_nb)(cst_log2_nb_parallel_winograd5-stage),
            i_data2_re => matrix_inputs_outputs_radix2_cells_re(cell_nb+(cst_nb_samples_in_5ndft/(2**stage)))(cst_log2_nb_parallel_winograd5-stage),
            i_data2_im => matrix_inputs_outputs_radix2_cells_im(cell_nb+(cst_nb_samples_in_5ndft/(2**stage)))(cst_log2_nb_parallel_winograd5-stage),
            o_data1_re => matrix_inputs_outputs_radix2_cells_re(cell_nb)(cst_log2_nb_parallel_winograd5-stage+1),
            o_data1_im => matrix_inputs_outputs_radix2_cells_im(cell_nb)(cst_log2_nb_parallel_winograd5-stage+1),
            o_data2_re => matrix_inputs_outputs_radix2_cells_re(cell_nb+(cst_nb_samples_in_5ndft/(2**stage)))(cst_log2_nb_parallel_winograd5-stage+1),
            o_data2_im => matrix_inputs_outputs_radix2_cells_im(cell_nb+(cst_nb_samples_in_5ndft/(2**stage)))(cst_log2_nb_parallel_winograd5-stage+1)
            );

      END GENERATE parallel_cells;
    END GENERATE k10_blocks;
  END GENERATE stages_generation;





  -- output results

  -- purpose: rounds the outputs to have cst_w_out_5ndft bits. Does not round
  -- if cst_w_out_5ndft = cst_dft_w_out_5ndft
  -- type   : sequential
  -- inputs : i_clk, matrix_inputs_outputs_radix2_cells_re, matrix_inputs_outputs_radix2_cells_im
  -- outputs: o_data_re, i_data_im
  --output_rounding : PROCESS (i_clk) IS
  --BEGIN  -- PROCESS output_rounding
    --IF rising_edge(i_clk) THEN          -- rising clock edge
      rounding : FOR i IN 0 TO cst_nb_samples_in_5ndft-1 generate
        --IF(matrix_inputs_outputs_radix2_cells_re(i)(cst_log2_nb_parallel_winograd5)(cst_dft_w_out_5ndft-nb_bits_shift_round-1-cst_w_out_5ndft) = '1' AND cst_w_out_5ndft < cst_dft_w_out_5ndft) THEN
          o_data_re(i) <= matrix_inputs_outputs_radix2_cells_re(i)(cst_log2_nb_parallel_winograd5)(cst_dft_w_out_5ndft-nb_bits_shift_round-1 DOWNTO cst_dft_w_out_5ndft-nb_bits_shift_round-cst_w_out_5ndft);
        --ELSE
          --o_data_re(i) <= matrix_inputs_outputs_radix2_cells_re(i)(cst_log2_nb_parallel_winograd5)(cst_dft_w_out_5ndft-nb_bits_shift_round-1 DOWNTO cst_dft_w_out_5ndft-nb_bits_shift_round-cst_w_out_5ndft);
        --END IF;
        --IF(matrix_inputs_outputs_radix2_cells_im(i)(cst_log2_nb_parallel_winograd5)(cst_dft_w_out_5ndft-nb_bits_shift_round-1-cst_w_out_5ndft) = '1' AND cst_w_out_5ndft < cst_dft_w_out_5ndft) THEN
          o_data_im(i) <= matrix_inputs_outputs_radix2_cells_im(i)(cst_log2_nb_parallel_winograd5)(cst_dft_w_out_5ndft-nb_bits_shift_round-1 DOWNTO cst_dft_w_out_5ndft-nb_bits_shift_round-cst_w_out_5ndft);
        --ELSE
          --o_data_im(i) <= matrix_inputs_outputs_radix2_cells_im(i)(cst_log2_nb_parallel_winograd5)(cst_dft_w_out_5ndft-nb_bits_shift_round-1 DOWNTO cst_dft_w_out_5ndft-nb_bits_shift_round-cst_w_out_5ndft);
        --END IF;
      END GENERATE rounding;  -- i
    --END IF;

  --END PROCESS output_rounding;



  -- if you prefer the whole result, uncomment this section and comment the
  -- output_rounding process above. You should also change smpl_out_5ndft from
  -- cst_w_out_5ndft to cst_dft_w_out_5ndft (line 32 in FIVEn_dft_pkg).
  --fill_outputs : FOR i IN 0 TO cst_nb_samples_in_5ndft-1 GENERATE
  --  o_data_re(i) <= matrix_inputs_outputs_radix2_cells_re(i)(cst_log2_nb_parallel_winograd5);
  --  o_data_im(i) <= matrix_inputs_outputs_radix2_cells_im(i)(cst_log2_nb_parallel_winograd5);
  --END GENERATE fill_outputs;


 END ARCHITECTURE instanciating_cells;
--- a/5ndft/mult_blk_5ndft.vhd
+++ b/5ndft/mult_blk_5ndft.vhd
@@ -0,0 +1,64 @@
 LIBRARY ieee;
 USE ieee.std_logic_1164.ALL;
 USE ieee.numeric_std.ALL;
 USE work.FIVEn_DFT_PKG.ALL;

 -- purpose : deledate the multiplication and the addition witn the complex
 -- exponential Wn to this bloc
 ENTITY MULT_BLK_5nDFT IS
  GENERIC(
    w_in   : natural;
    w_mult : natural
    );
  PORT(i_clk     : IN  std_logic;
       i_data_re : IN  smpl_out_radix2;
       i_data_im : IN  smpl_out_radix2;
       i_cos     : IN  smpl_cos_sin_wb;
       i_sin     : IN  smpl_cos_sin_wb;
       o_data_re : OUT smpl_out_radix2 := (OTHERS => '0');
       o_data_im : OUT smpl_out_radix2 := (OTHERS => '0')
       );
 END MULT_BLK_5nDFT;

 ARCHITECTURE Mult_Path OF MULT_BLK_5nDFT IS

  SIGNAL data_mult_re_cos_signed : smpl_out_signed_radix2 := (OTHERS => '0');  -- intermediate result re*cos
  SIGNAL data_mult_im_cos_signed : smpl_out_signed_radix2 := (OTHERS => '0');  -- intermediate result im*cos
  SIGNAL data_mult_re_sin_signed : smpl_out_signed_radix2 := (OTHERS => '0');  -- intermediate result im*sin,!i*i=-1!
  SIGNAL data_mult_im_sin_signed : smpl_out_signed_radix2 := (OTHERS => '0');  -- intermediate result re*sin
  SIGNAL sin_signed              : smpl_cos_sin_signed_wb := (OTHERS => '0');  -- signed input sin
  SIGNAL cos_signed              : smpl_cos_sin_signed_wb := (OTHERS => '0');  -- signed input cos
  SIGNAL data_re_signed          : smpl_out_signed_radix2 := (OTHERS => '0');  -- signed input data im
  SIGNAL data_im_signed          : smpl_out_signed_radix2 := (OTHERS => '0');  -- signed input data re


 BEGIN

  -- assign the signed signals before doing any operation
  data_re_signed(w_in-1 DOWNTO 0) <= signed(i_data_re(w_in-1 DOWNTO 0));
  data_im_signed(w_in-1 DOWNTO 0) <= signed(i_data_im(w_in-1 DOWNTO 0));
  cos_signed                      <= signed(i_cos);
  sin_signed                      <= signed(i_sin);


  
  -- purpose: multiply the real and imag part with the cos ans isin part of the
  -- exponential, and add the results of real parts and imag ones together.
  -- Needs 2 clock edges to process the whole mult result
  -- inputs: signed data(im & re), signed exponential (cos & sin)
  -- outputs: o_data_re, o_data_im
  mult : PROCESS(i_clk)
  BEGIN
    IF rising_edge(i_clk) THEN
      data_mult_re_cos_signed(w_in+w_mult-1 DOWNTO 0) <= data_re_signed(w_in-1 DOWNTO 0) * cos_signed;
      data_mult_im_cos_signed(w_in+w_mult-1 DOWNTO 0) <= data_im_signed(w_in-1 DOWNTO 0) * cos_signed;
      data_mult_im_sin_signed(w_in+w_mult-1 DOWNTO 0) <= data_re_signed(w_in-1 DOWNTO 0) * sin_signed;
      data_mult_re_sin_signed(w_in+w_mult-1 DOWNTO 0) <= data_im_signed(w_in-1 DOWNTO 0) * sin_signed;

      o_data_re(w_in+w_mult DOWNTO 0) <= std_logic_vector(unsigned(signed(data_mult_re_cos_signed(w_in+w_mult-1)&data_mult_re_cos_signed(w_in+w_mult-1 DOWNTO 0)) - signed(data_mult_re_sin_signed(w_in+w_mult-1)&data_mult_re_sin_signed(w_in+w_mult-1 DOWNTO 0))));-- i*i=-1
      o_data_im(w_in+w_mult DOWNTO 0) <= std_logic_vector(unsigned(signed(data_mult_im_cos_signed(w_in+w_mult-1)&data_mult_im_cos_signed(w_in+w_mult-1 DOWNTO 0)) + signed(data_mult_im_sin_signed(w_in+w_mult-1)&data_mult_im_sin_signed(w_in+w_mult-1 DOWNTO 0))));
    END IF;

  END PROCESS;

 END Mult_Path;
--- a/5ndft/radix_2_cell.vhd
+++ b/5ndft/radix_2_cell.vhd
@@ -0,0 +1,97 @@
 LIBRARY ieee;
 USE ieee.std_logic_1164.ALL;
 USE ieee.std_logic_signed.ALL;
 USE ieee.numeric_std.ALL;
 USE work.FIVEn_DFT_PKG.ALL;

 ENTITY radix_2_cell_winograd IS
  GENERIC(
    w_in : natural
    );
  PORT(
    i_clk      :     std_logic;
    i_cos      :     smpl_cos_sin_wb;
    i_sin      :     smpl_cos_sin_wb;
    i_data1_re : IN  smpl_out_radix2;
    i_data1_im : IN  smpl_out_radix2;
    i_data2_re : IN  smpl_out_radix2;
    i_data2_im : IN  smpl_out_radix2;
    o_data1_re : OUT smpl_out_radix2 := (OTHERS => '0');
    o_data1_im : OUT smpl_out_radix2 := (OTHERS => '0');
    o_data2_re : OUT smpl_out_radix2 := (OTHERS => '0');
    o_data2_im : OUT smpl_out_radix2 := (OTHERS => '0')
    );
 END radix_2_cell_winograd;

 ARCHITECTURE radix2 OF radix_2_cell_winograd IS

  TYPE vect_result_multiply IS ARRAY (0 TO 1) OF std_logic_vector(w_in + cst_w_precision_radix2_coeffs_5ndft-2 DOWNTO 0);
  SIGNAL signed_data_im                      : smpl_out_winograd5_signed_5ndft                                  := (OTHERS => '0');
  SIGNAL signed_data_re                      : smpl_out_winograd5_signed_5ndft                                  := (OTHERS => '0');
  SIGNAL multiply_by_2_power_cst_w_precision : std_logic_vector(cst_w_precision_radix2_coeffs_5ndft-3 DOWNTO 0) := (OTHERS => '0');
  SIGNAL data_matrix_im                      : matrix_radix2_cell                                               := (OTHERS => (OTHERS => (OTHERS => '0')));
  SIGNAL data_matrix_re                      : matrix_radix2_cell                                               := (OTHERS => (OTHERS => (OTHERS => '0')));
  SIGNAL data_vect_result_mult_re            : vect_result_multiply                                             := (OTHERS => (OTHERS => '0'));
  SIGNAL data_vect_result_mult_im            : vect_result_multiply                                             := (OTHERS => (OTHERS => '0'));

 BEGIN


  -- assign block inputs and outputs their corresponding matrix columns
  data_matrix_re(0)(0) <= i_data1_re;
  data_matrix_re(1)(0) <= i_data2_re;
  data_matrix_im(0)(0) <= i_data1_im;
  data_matrix_im(1)(0) <= i_data2_im;
  o_data1_re           <= data_matrix_re(0)(3);
  o_data2_re           <= data_matrix_re(1)(3);
  o_data1_im           <= data_matrix_im(0)(3);
  o_data2_im           <= data_matrix_im(1)(3);

  

  --instanciating the MULT_BLK_5nDFT(Mult_Path)
  mult_inst1 : ENTITY work.MULT_BLK_5nDFT(Mult_Path)
    GENERIC MAP(
      w_in   => w_in,
      w_mult => cst_w_precision_radix2_coeffs_5ndft
      )
    PORT MAP(i_clk     => i_clk,
             i_data_re => data_matrix_re(1)(0),
             i_data_im => data_matrix_im(1)(0),
             i_cos     => i_cos,
             i_sin     => i_sin,
             o_data_re => data_matrix_re(1)(2),
             o_data_im => data_matrix_im(1)(2)
             );



  -- purpose: calculating the multiplication and the 2 additions/substractions.
  -- The multiplication per 1 in the top of the butterfly is replaced by a
  -- shift with 0s towards the MSBs
  -- inputs: data_matrix_im(x)(0), data_matrix_re(x)(0)
  -- outputs: data_matrix_im(x)(3), data_matrix_re(x)(3)
  radix2_structure : PROCESS(i_clk)
  BEGIN
    IF(rising_edge(i_clk)) THEN
      -- mult per 1 (top)
      data_matrix_im(0)(1)(w_in+cst_w_precision_radix2_coeffs_5ndft DOWNTO 0) <= data_matrix_im(0)(0)(w_in-1)&data_matrix_im(0)(0)(w_in-1)&data_matrix_im(0)(0)(w_in-1)&data_matrix_im(0)(0)(w_in-1 DOWNTO 0)&multiply_by_2_power_cst_w_precision;
      data_matrix_re(0)(1)(w_in+cst_w_precision_radix2_coeffs_5ndft DOWNTO 0) <= data_matrix_re(0)(0)(w_in-1)&data_matrix_re(0)(0)(w_in-1)&data_matrix_re(0)(0)(w_in-1)&data_matrix_re(0)(0)(w_in-1 DOWNTO 0)&multiply_by_2_power_cst_w_precision;
      data_matrix_im(0)(2) <= data_matrix_im(0)(1);
      data_matrix_re(0)(2) <= data_matrix_re(0)(1);

      -- mult (down) : see mult_blk instanciation


      
      --add
      data_matrix_re(0)(3)(w_in+cst_w_precision_radix2_coeffs_5ndft+1 DOWNTO 0) <= std_logic_vector(unsigned(signed(data_matrix_re(0)(2)(w_in+cst_w_precision_radix2_coeffs_5ndft)&data_matrix_re(0)(2)(w_in+cst_w_precision_radix2_coeffs_5ndft DOWNTO 0))+signed(data_matrix_re(1)(2)(w_in+cst_w_precision_radix2_coeffs_5ndft)&data_matrix_re(1)(2)(w_in+cst_w_precision_radix2_coeffs_5ndft DOWNTO 0))));
      data_matrix_re(1)(3)(w_in+cst_w_precision_radix2_coeffs_5ndft+1 DOWNTO 0) <= std_logic_vector(unsigned(signed(data_matrix_re(0)(2)(w_in+cst_w_precision_radix2_coeffs_5ndft)&data_matrix_re(0)(2)(w_in+cst_w_precision_radix2_coeffs_5ndft DOWNTO 0))-signed(data_matrix_re(1)(2)(w_in+cst_w_precision_radix2_coeffs_5ndft)&data_matrix_re(1)(2)(w_in+cst_w_precision_radix2_coeffs_5ndft DOWNTO 0))));
      
      data_matrix_im(0)(3)(w_in+cst_w_precision_radix2_coeffs_5ndft+1 DOWNTO 0) <= std_logic_vector(unsigned(signed(data_matrix_im(0)(2)(w_in+cst_w_precision_radix2_coeffs_5ndft)&data_matrix_im(0)(2)(w_in+cst_w_precision_radix2_coeffs_5ndft DOWNTO 0))+signed(data_matrix_im(1)(2)(w_in+cst_w_precision_radix2_coeffs_5ndft)&data_matrix_im(1)(2)(w_in+cst_w_precision_radix2_coeffs_5ndft DOWNTO 0))));
      data_matrix_im(1)(3)(w_in+cst_w_precision_radix2_coeffs_5ndft+1 DOWNTO 0) <= std_logic_vector(unsigned(signed(data_matrix_im(0)(2)(w_in+cst_w_precision_radix2_coeffs_5ndft)&data_matrix_im(0)(2)(w_in+cst_w_precision_radix2_coeffs_5ndft DOWNTO 0))-signed(data_matrix_im(1)(2)(w_in+cst_w_precision_radix2_coeffs_5ndft)&data_matrix_im(1)(2)(w_in+cst_w_precision_radix2_coeffs_5ndft DOWNTO 0))));
    END IF;

  END PROCESS;

 END radix2;
--- a/5ndft/winograd5.vhd
+++ b/5ndft/winograd5.vhd
@@ -0,0 +1,306 @@
 LIBRARY ieee;
 USE ieee.std_logic_1164.ALL;
 USE ieee.std_logic_signed.ALL;
 USE ieee.numeric_std.ALL;
 USE work.FIVEn_DFT_PKG.ALL;

 ENTITY WINOGRAD5 IS
  PORT(
    i_clk     : IN  std_logic;
    i_data_im : IN  vect_input_winograd5_5ndft;
    i_data_re : IN  vect_input_winograd5_5ndft;
    o_data_im : OUT vect_output_winograd5_5ndft := (OTHERS => (OTHERS => '0'));
    o_data_re : OUT vect_output_winograd5_5ndft := (OTHERS => (OTHERS => '0'))
    );
 END WINOGRAD5;

 ARCHITECTURE Behavioral OF WINOGRAD5 IS

  SIGNAL mult_factors              : vect_mult_factors_32b          := ("10110000000000000000000000000000", "00100011110001101110111100110111", "00010111001111111101011000011110", "01100010011111000110001000101111", "00100101100111100100011000001001");
  -- multipliers multiplied by 2^30
  SIGNAL mult_factor_w_multipliers : vect_mult_factor_w_multipliers := (OTHERS => (OTHERS => '0'));

  --SIGNAL matrix_stages_signed_im             : matrix_winograd5_generic_signed_stages                              := (OTHERS => (OTHERS => (OTHERS => '0')));
  --SIGNAL matrix_stages_signed_re             : matrix_winograd5_generic_signed_stages                              := (OTHERS => (OTHERS => (OTHERS => '0')));
  SIGNAL matrix_stages_im                    : matrix_winograd5_generic_stages                                     := (OTHERS => (OTHERS => (OTHERS => '0')));  --matrix(x)(y) = matrix(stage)(layer) form
  SIGNAL matrix_stages_re                    : matrix_winograd5_generic_stages                                     := (OTHERS => (OTHERS => (OTHERS => '0')));  -- matrix(x)(y) = matrix(stage)(layer) form
  SIGNAL multiply_by_2_power_cst_w_precision : std_logic_vector(cst_w_precision_winograd5_coeffs_5ndft-3 DOWNTO 0) := (OTHERS => '0');
  --SIGNAL in1, in2        :    smpl_out_winograd5_5ndft;
  --SIGNAL isigned         :    smpl_out_winograd5_signed_5ndft;





  FUNCTION add_sub (
    w_smpl   : IN natural;
    sub      : IN boolean;
    in1, in2 : IN smpl_out_winograd5_5ndft)
    RETURN smpl_out_winograd5_5ndft IS
    VARIABLE smpl_stages_out : smpl_out_winograd5_5ndft := (OTHERS => '0');
  BEGIN
    IF sub THEN
      smpl_stages_out(w_smpl DOWNTO 0) := std_logic_vector(unsigned(signed(in1(w_smpl-1)&in1(w_smpl-1 DOWNTO 0))-signed(in2(w_smpl-1)&in2(w_smpl-1 DOWNTO 0))));
    ELSE
      smpl_stages_out(w_smpl DOWNTO 0) := std_logic_vector(unsigned(signed(in1(w_smpl-1)&in1(w_smpl-1 DOWNTO 0))+signed(in2(w_smpl-1)&in2(w_smpl-1 DOWNTO 0))));
    END IF;
    RETURN smpl_stages_out;
  END FUNCTION;





  FUNCTION mult (
    w_smpl  : IN natural;
    cmplx   : IN boolean;
    w_coeff : IN natural;
    input   : IN smpl_out_winograd5_5ndft;
    coeff   : IN smpl_mult_factor_w_multipliers
    )
    RETURN std_logic_vector IS
    VARIABLE smpl_signed : smpl_out_winograd5_signed_5ndft := (OTHERS => '0');
    VARIABLE smpl_out    : smpl_out_winograd5_5ndft;
  BEGIN  -- FUNCTION mult
    IF(cmplx) THEN
      smpl_signed(w_smpl+w_coeff-1 DOWNTO 0) := signed(input(w_smpl-1 DOWNTO 0)) * (-signed(coeff));
    ELSE
      smpl_signed(w_smpl+w_coeff-1 DOWNTO 0) := signed(input(w_smpl-1 DOWNTO 0)) * signed(coeff);
    END IF;
    smpl_out := std_logic_vector(unsigned(smpl_signed));
    RETURN smpl_out;
  END FUNCTION mult;





  FUNCTION smpl_copy (
    w_smpl  : natural;
    smpl_in : smpl_out_winograd5_5ndft)
    RETURN std_logic_vector IS
    VARIABLE smpl_out : smpl_out_winograd5_5ndft := (OTHERS => '0');
  BEGIN  -- FUNCTION copy
    smpl_out(w_smpl DOWNTO 0) := smpl_in(w_smpl-1)&smpl_in(w_smpl-1 DOWNTO 0);
    RETURN smpl_out;  --smpl_in(cst_winograd5_w_out_5ndft-1 DOWNTO w_smpl+1)&smpl_in(w_smpl-1)&smpl_in(w_smpl-1 DOWNTO 0);--smpl_out;
  END FUNCTION smpl_copy;


 BEGIN


  fill_input_output_matrix_for : FOR i IN 0 TO 4 GENERATE

    matrix_stages_im(0)(i)(cst_w_in_5ndft-1 DOWNTO 0) <= i_data_im(i)(cst_w_in_5ndft-1 DOWNTO 0);
    matrix_stages_re(0)(i)(cst_w_in_5ndft-1 DOWNTO 0) <= i_data_re(i)(cst_w_in_5ndft-1 DOWNTO 0);
    o_data_im(i)                                      <= matrix_stages_im(7)(i);
    o_data_re(i)                                      <= matrix_stages_re(7)(i);

  END GENERATE fill_input_output_matrix_for;



  -- purpose: Rounds the data in 32bits into cst_w_precision_winograd5_coeffs_5ndft bits (round for
  -- MSBs). Should run and cut before simulation and bitstream
  -- inputs : the coeffs to be cut mult_factors
  -- outputs: the cut coeffs mult_factor_w_multipliers
  mult_coeffs_cut : PROCESS(mult_factors)
  BEGIN
    FOR i IN 1 TO 5 LOOP
      mult_factor_w_multipliers(i) <= mult_factors(i)(31 DOWNTO 32-cst_w_precision_winograd5_coeffs_5ndft);
      IF(mult_factors(i)(31-cst_w_precision_winograd5_coeffs_5ndft) = '1') THEN
        mult_factor_w_multipliers(i) <= std_logic_vector(unsigned(signed(mult_factors(i)(31 DOWNTO 32-cst_w_precision_winograd5_coeffs_5ndft)) +1));
      END IF;
    END LOOP;  -- i
  END PROCESS mult_coeffs_cut;



  -- purpose: calculates each stage following the winograd5 schema
  -- inputs:  matrix_stages_im(0)
  -- outputs: matrix_stages_im(7)
  calculations_process : PROCESS(i_clk)
    VARIABLE w_smpl : natural;
  BEGIN

    IF(rising_edge(i_clk)) THEN

      -- stage 1
      -- w_smpl              :=== cst_w_in_5ndft+1;
      --data0
      matrix_stages_im(1)(0) <= smpl_copy(w_smpl => cst_w_in_5ndft, smpl_in => matrix_stages_im(0)(0));
      matrix_stages_re(1)(0) <= smpl_copy(w_smpl => cst_w_in_5ndft, smpl_in => matrix_stages_re(0)(0));

      -- data1
      matrix_stages_im(1)(1) <= add_sub (w_smpl => cst_w_in_5ndft, sub => false, in1 => matrix_stages_im(0)(3), in2 => matrix_stages_im(0)(2));
      matrix_stages_re(1)(1) <= add_sub (w_smpl => cst_w_in_5ndft, sub => false, in1 => matrix_stages_re(0)(3), in2 => matrix_stages_re(0)(2));

      --data2
      matrix_stages_im(1)(2) <= add_sub (w_smpl => cst_w_in_5ndft, sub => false, in1 => matrix_stages_im(0)(4), in2 => matrix_stages_im(0)(1));
      matrix_stages_re(1)(2) <= add_sub (w_smpl => cst_w_in_5ndft, sub => false, in1 => matrix_stages_re(0)(4), in2 => matrix_stages_re(0)(1));

      --data3
      matrix_stages_im(1)(3) <= add_sub (w_smpl => cst_w_in_5ndft, sub => true, in1 => matrix_stages_im(0)(3), in2 => matrix_stages_im(0)(2));
      matrix_stages_re(1)(3) <= add_sub (w_smpl => cst_w_in_5ndft, sub => true, in1 => matrix_stages_re(0)(3), in2 => matrix_stages_re(0)(2));

      --data4
      matrix_stages_im(1)(4) <= add_sub (w_smpl => cst_w_in_5ndft, sub => true, in1 => matrix_stages_im(0)(1), in2 => matrix_stages_im(0)(4));
      matrix_stages_re(1)(4) <= add_sub (w_smpl => cst_w_in_5ndft, sub => true, in1 => matrix_stages_re(0)(1), in2 => matrix_stages_re(0)(4));





      -- stage 2
      w_smpl                 := cst_w_in_5ndft+1;
      -- data0,3,4
      matrix_stages_im(2)(0) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_im(1)(0));
      matrix_stages_re(2)(0) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_re(1)(0));
      matrix_stages_im(2)(3) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_im(1)(3));
      matrix_stages_re(2)(3) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_re(1)(3));
      matrix_stages_im(2)(4) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_im(1)(4));
      matrix_stages_re(2)(4) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_re(1)(4));

      --data1
      matrix_stages_im(2)(1) <= add_sub (w_smpl => w_smpl, sub => false, in1 => matrix_stages_im(1)(1), in2 => matrix_stages_im(1)(2));
      matrix_stages_re(2)(1) <= add_sub (w_smpl => w_smpl, sub => false, in1 => matrix_stages_re(1)(1), in2 => matrix_stages_re(1)(2));

      --data2
      matrix_stages_re(2)(2) <= add_sub(w_smpl => w_smpl, sub => true, in1 => matrix_stages_re(1)(2), in2 => matrix_stages_re(1)(1));
      matrix_stages_im(2)(2) <= add_sub(w_smpl => w_smpl, sub => true, in1 => matrix_stages_im(1)(2), in2 => matrix_stages_im(1)(1));

      --data5
      matrix_stages_re(2)(5) <= add_sub(w_smpl => w_smpl, sub => false, in1 => matrix_stages_re(1)(3), in2 => matrix_stages_re(1)(4));
      matrix_stages_im(2)(5) <= add_sub(w_smpl => w_smpl, sub => false, in1 => matrix_stages_im(1)(3), in2 => matrix_stages_im(1)(4));





      -- stage 3
      w_smpl                 := cst_w_in_5ndft+2;
      --data1,2,3,4,5
      matrix_stages_im(3)(1) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_im(2)(1));
      matrix_stages_re(3)(1) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_re(2)(1));
      matrix_stages_im(3)(2) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_im(2)(2));
      matrix_stages_re(3)(2) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_re(2)(2));
      matrix_stages_im(3)(3) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_im(2)(3));
      matrix_stages_re(3)(3) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_re(2)(3));
      matrix_stages_im(3)(4) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_im(2)(4));
      matrix_stages_re(3)(4) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_re(2)(4));
      matrix_stages_im(3)(5) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_im(2)(5));
      matrix_stages_re(3)(5) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_re(2)(5));

      --data0
      matrix_stages_im(3)(0) <= add_sub(w_smpl => w_smpl, sub => false, in1 => matrix_stages_im(2)(1), in2 => matrix_stages_im(2)(0));
      matrix_stages_re(3)(0) <= add_sub(w_smpl => w_smpl, sub => false, in1 => matrix_stages_re(2)(1), in2 => matrix_stages_re(2)(0));





      -- stage 4, multiply
      w_smpl                                                                           := cst_w_in_5ndft+3;
      --data0, multiply by 1 = copy
      matrix_stages_im(4)(0)(w_smpl+cst_w_precision_winograd5_coeffs_5ndft-1 DOWNTO 0) <= matrix_stages_im(3)(0)(w_smpl-1)&matrix_stages_im(3)(0)(w_smpl-1)&matrix_stages_im(3)(0)(w_smpl-1 DOWNTO 0)&multiply_by_2_power_cst_w_precision;
      matrix_stages_re(4)(0)(w_smpl+cst_w_precision_winograd5_coeffs_5ndft-1 DOWNTO 0) <= matrix_stages_re(3)(0)(w_smpl-1)&matrix_stages_re(3)(0)(w_smpl-1)&matrix_stages_re(3)(0)(w_smpl-1 DOWNTO 0)&multiply_by_2_power_cst_w_precision;

      --data1
      matrix_stages_im(4)(1) <= mult (w_smpl => w_smpl, cmplx => false, w_coeff => cst_w_precision_winograd5_coeffs_5ndft, input => matrix_stages_im(3)(1), coeff => mult_factor_w_multipliers(1));
      matrix_stages_re(4)(1) <= mult (w_smpl => w_smpl, cmplx => false, w_coeff => cst_w_precision_winograd5_coeffs_5ndft, input => matrix_stages_re(3)(1), coeff => mult_factor_w_multipliers(1));

      --data2
      matrix_stages_im(4)(2) <= mult (w_smpl => w_smpl, cmplx => false, w_coeff => cst_w_precision_winograd5_coeffs_5ndft, input => matrix_stages_im(3)(2), coeff => mult_factor_w_multipliers(2));
      matrix_stages_re(4)(2) <= mult (w_smpl => w_smpl, cmplx => false, w_coeff => cst_w_precision_winograd5_coeffs_5ndft, input => matrix_stages_re(3)(2), coeff => mult_factor_w_multipliers(2));

      --data3
      matrix_stages_re(4)(3) <= mult (w_smpl => w_smpl, cmplx => true, w_coeff => cst_w_precision_winograd5_coeffs_5ndft, input => matrix_stages_im(3)(3), coeff => mult_factor_w_multipliers(3));
      matrix_stages_im(4)(3) <= mult (w_smpl => w_smpl, cmplx => false, w_coeff => cst_w_precision_winograd5_coeffs_5ndft, input => matrix_stages_re(3)(3), coeff => mult_factor_w_multipliers(3));

      --data4
      matrix_stages_re(4)(4) <= mult (w_smpl => w_smpl, cmplx => true, w_coeff => cst_w_precision_winograd5_coeffs_5ndft, input => matrix_stages_im(3)(4), coeff => mult_factor_w_multipliers(4));
      matrix_stages_im(4)(4) <= mult (w_smpl => w_smpl, cmplx => false, w_coeff => cst_w_precision_winograd5_coeffs_5ndft, input => matrix_stages_re(3)(4), coeff => mult_factor_w_multipliers(4));

      --data5
      matrix_stages_re(4)(5) <= mult (w_smpl => w_smpl, cmplx => true, w_coeff => cst_w_precision_winograd5_coeffs_5ndft, input => matrix_stages_im(3)(5), coeff => mult_factor_w_multipliers(5));
      matrix_stages_im(4)(5) <= mult (w_smpl => w_smpl, cmplx => false, w_coeff => cst_w_precision_winograd5_coeffs_5ndft, input => matrix_stages_re(3)(5), coeff => mult_factor_w_multipliers(5));





      -- stage 5
      w_smpl                 := cst_w_in_5ndft+3+cst_w_precision_winograd5_coeffs_5ndft;
      --data0, 2, 3, 4, 5
      matrix_stages_im(5)(0) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_im(4)(0));
      matrix_stages_re(5)(0) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_re(4)(0));
      matrix_stages_im(5)(2) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_im(4)(2));
      matrix_stages_re(5)(2) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_re(4)(2));
      matrix_stages_im(5)(3) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_im(4)(3));
      matrix_stages_re(5)(3) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_re(4)(3));
      matrix_stages_im(5)(4) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_im(4)(4));
      matrix_stages_re(5)(4) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_re(4)(4));
      matrix_stages_im(5)(5) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_im(4)(5));
      matrix_stages_re(5)(5) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_re(4)(5));

      --data1
      matrix_stages_re(5)(1) <= add_sub(w_smpl => w_smpl, sub => false, IN1 => matrix_stages_re(4)(1), in2 => matrix_stages_re(4)(0));
      matrix_stages_im(5)(1) <= add_sub(w_smpl => w_smpl, sub => false, in1 => matrix_stages_im(4)(1), in2 => matrix_stages_im(4)(0));





      -- stage 6
      w_smpl                 := cst_w_in_5ndft+3+cst_w_precision_winograd5_coeffs_5ndft+1;
      --data0
      matrix_stages_im(6)(0) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_im(5)(0));
      matrix_stages_re(6)(0) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_re(5)(0));

      --data1
      matrix_stages_re(6)(1) <= add_sub(w_smpl => w_smpl, sub => true, in1 => matrix_stages_re(5)(1), in2 => matrix_stages_re(5)(2));
      matrix_stages_im(6)(1) <= add_sub(w_smpl => w_smpl, sub => true, in1 => matrix_stages_im(5)(1), in2 => matrix_stages_im(5)(2));

      --data2
      matrix_stages_re(6)(2) <= add_sub(w_smpl => w_smpl, sub => false, in1 => matrix_stages_re(5)(1), in2 => matrix_stages_re(5)(2));
      matrix_stages_im(6)(2) <= add_sub(w_smpl => w_smpl, sub => false, in1 => matrix_stages_im(5)(1), in2 => matrix_stages_im(5)(2));

      --data3
      matrix_stages_re(6)(3) <= add_sub(w_smpl => w_smpl, sub => false, in1 => matrix_stages_re(5)(5), in2 => matrix_stages_re(5)(3));
      matrix_stages_im(6)(3) <= add_sub(w_smpl => w_smpl, sub => false, IN1 => matrix_stages_im(5)(5), in2 => matrix_stages_im(5)(3));

      --data4
      matrix_stages_re(6)(4) <= add_sub(w_smpl => w_smpl, sub => true, in1 => matrix_stages_re(5)(5), in2 => matrix_stages_re(5)(4));
      matrix_stages_im(6)(4) <= add_sub(w_smpl => w_smpl, sub => true, in1 => matrix_stages_im(5)(5), in2 => matrix_stages_im(5)(4));





      -- stage 7
      w_smpl                 := cst_w_in_5ndft+3+cst_w_precision_winograd5_coeffs_5ndft+2;
      --data0
      matrix_stages_im(7)(0) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_im(6)(0));
      matrix_stages_re(7)(0) <= smpl_copy(w_smpl => w_smpl, smpl_in => matrix_stages_re(6)(0));

      --data1
      matrix_stages_re(7)(1) <= add_sub(w_smpl => w_smpl, sub => false, in1 => matrix_stages_re(6)(2), in2 => matrix_stages_re(6)(4));
      matrix_stages_im(7)(1) <= add_sub(w_smpl => w_smpl, sub => false, in1 => matrix_stages_im(6)(2), in2 => matrix_stages_im(6)(4));

      --data2
      matrix_stages_re(7)(2) <= add_sub(w_smpl => w_smpl, sub => true, in1 => matrix_stages_re(6)(1), in2 => matrix_stages_re(6)(3));
      matrix_stages_im(7)(2) <= add_sub(w_smpl => w_smpl, sub => true, in1 => matrix_stages_im(6)(1), in2 => matrix_stages_im(6)(3));

      --data3
      matrix_stages_re(7)(3) <= add_sub(w_smpl => w_smpl, sub => false, in1 => matrix_stages_re(6)(1), in2 => matrix_stages_re(6)(3));
      matrix_stages_im(7)(3) <= add_sub(w_smpl => w_smpl, sub => false, in1 => matrix_stages_im(6)(1), in2 => matrix_stages_im(6)(3));

      --data4
      matrix_stages_re(7)(4) <= add_sub(w_smpl => w_smpl, sub => true, in1 => matrix_stages_re(6)(2), in2 => matrix_stages_re(6)(4));
      matrix_stages_im(7)(4) <= add_sub(w_smpl => w_smpl, sub => true, in1 => matrix_stages_im(6)(2), in2 => matrix_stages_im(6)(4));

    END IF;
  END PROCESS calculations_process;


 END Behavioral;