LIBRARY ieee; USE ieee.std_logic_1164.ALL; USE ieee.std_logic_signed.ALL; USE ieee.numeric_std.ALL; USE work.FIVEn_DFT_PKG.ALL; USE work.coeff.ALL; ENTITY FFT_tree IS PORT( i_clk : IN std_logic; i_data_re : IN vect_dft_input; i_data_im : IN vect_dft_input; o_data_re : OUT vect_dft_output; o_data_im : OUT vect_dft_output ); END FFT_tree; ARCHITECTURE instanciating_cells OF FFT_tree IS SIGNAL cos_k_pi_over_5 : vect_cos_sin_k_pi_over_5_32b := ("01000000000000000000000000000000", "00110011110001101110111100110111", "00010011110001101110111100110111", "11101100001110010001000011001001", "11001100001110010001000011001001"); -- coeffs multiplied by 2^30 SIGNAL sin_k_pi_over_5 : vect_cos_sin_k_pi_over_5_32b := ("00000000000000000000000000000000", "11011010011000011011100111110111", "11000011001000011110001111011001", "11000011001000011110001111011001", "11011010011000011011100111110111"); -- coeffs multiplied by 2^30 -- purpose: give the proper arrangement for the winograd5 blks -- the right order: even numbers then odd numbers. The even numbers are the -- result of others even_odd order multiplied by 2, the odd numbers are the -- result of others even_odd order multiplied by 2+1. -- input: the nth winograd5 instance when sort -- output: the corresponding winograd5 instance number when sort FUNCTION rearrange ( i : IN natural) RETURN natural IS TYPE vect_rearrange IS ARRAY (0 TO cst_nb_samples_in_5ndft) OF natural; TYPE matrix_rearrange IS ARRAY (0 TO cst_log2_nb_parallel_winograd5) OF vect_rearrange; VARIABLE matrix_affect_rearrange : matrix_rearrange := (OTHERS => (OTHERS => 0)); BEGIN -- FUNCTION rearrange IF cst_log2_nb_parallel_winograd5 > 0 THEN matrix_affect_rearrange(1)(1) := 1; FOR stage IN 2 TO cst_log2_nb_parallel_winograd5 LOOP FOR i IN 0 TO 2**(stage-1)-1 LOOP matrix_affect_rearrange(stage)(i) := matrix_affect_rearrange(stage-1)(i)*2; matrix_affect_rearrange(stage)(i+2**(stage-1)) := matrix_affect_rearrange(stage-1)(i)*2+1; END LOOP; -- i END LOOP; -- stage RETURN matrix_affect_rearrange(cst_log2_nb_parallel_winograd5)(i); ELSE RETURN 0; END IF; END FUNCTION rearrange; TYPE matrix_input_winograd5_5ndft IS ARRAY (0 TO cst_nb_parallel_winograd5-1) OF vect_input_winograd5_5ndft; TYPE matrix_output_winograd5_5ndft IS ARRAY (0 TO cst_nb_parallel_winograd5-1) OF vect_output_winograd5_5ndft; SIGNAL cos_k_pi_over_5_wb : vect_cos_sin_k_pi_over_5_wb := (OTHERS => (OTHERS => '0')); SIGNAL sin_k_pi_over_5_wb : vect_cos_sin_k_pi_over_5_wb := (OTHERS => (OTHERS => '0')); SIGNAL data_out_winograd5_re : vect_total_output_winograd5_cells := (OTHERS => (OTHERS => '0')); SIGNAL data_out_winograd5_im : vect_total_output_winograd5_cells := (OTHERS => (OTHERS => '0')); SIGNAL winograd_rearranged_input_re : vect_dft_input := (OTHERS => (OTHERS => '0')); SIGNAL winograd_rearranged_input_im : vect_dft_input := (OTHERS => (OTHERS => '0')); SIGNAL matrix_inputs_outputs_radix2_cells_re : matrix_fft_stages := (OTHERS => (OTHERS => (OTHERS => '0'))); SIGNAL matrix_inputs_outputs_radix2_cells_im : matrix_fft_stages := (OTHERS => (OTHERS => (OTHERS => '0'))); SIGNAL vect_input_1_cell_winograd5_re : matrix_input_winograd5_5ndft := (OTHERS => (OTHERS => (OTHERS => '0'))); SIGNAL vect_input_1_cell_winograd5_im : matrix_input_winograd5_5ndft := (OTHERS => (OTHERS => (OTHERS => '0'))); SIGNAL vect_output_1_cell_winograd5_re : matrix_output_winograd5_5ndft := (OTHERS => (OTHERS => (OTHERS => '0'))); SIGNAL vect_output_1_cell_winograd5_im : matrix_output_winograd5_5ndft := (OTHERS => (OTHERS => (OTHERS => '0'))); BEGIN -- ARCHITECTURE instanciating_cells -- coeffs cut -- purpose: Rounds the data in 32bits into cst_w_precision_radix2_coeffs_5ndft bits (round for -- MSBs). Should run and cut before simulation and bitstream -- inputs : the coeffs to be cut sin_k_pi_over_5, cos_k_pi_over_5, sin_k_pi_over_80, cos_k_pi_over_80 -- outputs: the cut coeffs cos_k_pi_over_5_wb, sin_k_pi_over_5_wb, sin_k_pi_over_80_wb, cos_k_pi_over_80_wb mult_coeffs_cut : PROCESS(sin_k_pi_over_5, cos_k_pi_over_5) BEGIN FOR i IN 0 TO 4 LOOP cos_k_pi_over_5_wb(i) <= cos_k_pi_over_5(i)(31 DOWNTO 32-cst_w_precision_radix2_coeffs_5ndft); sin_k_pi_over_5_wb(i) <= sin_k_pi_over_5(i)(31 DOWNTO 32-cst_w_precision_radix2_coeffs_5ndft); IF(cos_k_pi_over_5(i)(32-cst_w_precision_radix2_coeffs_5ndft-1) = '1') THEN cos_k_pi_over_5_wb(i) <= std_logic_vector(unsigned(signed(cos_k_pi_over_5(i)(31 DOWNTO 32-cst_w_precision_radix2_coeffs_5ndft))+1)); END IF; IF(sin_k_pi_over_5(i)(32-cst_w_precision_radix2_coeffs_5ndft-1) = '1') THEN sin_k_pi_over_5_wb(i) <= std_logic_vector(unsigned(signed(sin_k_pi_over_5(i)(31 DOWNTO 32-cst_w_precision_radix2_coeffs_5ndft))+1)); END IF; END LOOP; -- i END PROCESS mult_coeffs_cut; -- inputs rearranging -- purpose: affects inputs on their right way (cf. schema and explanations) -- using the rearrange(i) function, which creates the right winograd -- instances numbers order. Rearranging inputs order is equivalent to rearranging -- winograd5 order. -- inputs: i_data_re, i_data_im -- outputs: winograd_rearranged_input_re, winograd_rearranged_input_im rearrange_winograd_input : FOR i IN 0 TO cst_nb_parallel_winograd5-1 GENERATE five_inputs : FOR j IN 0 TO 4 GENERATE winograd_rearranged_input_re(rearrange(i => i)*5+j) <= i_data_re(j*cst_nb_parallel_winograd5+i); winograd_rearranged_input_im(rearrange(i => i)*5+j) <= i_data_im(j*cst_nb_parallel_winograd5+i); END GENERATE five_inputs; END GENERATE rearrange_winograd_input; -- winograd instanciating -- purpose: wiring; instanciates the parallel winograd stage with their correct inputs and outputs -- inputs: winograd_rearranged_input_im, winograd_rearranged_input_re (length -- cst_nb_samples_in_5ndft each, cut then into subvectors of 5 inputs) -- instances: WINOGRAD5(Behavioral) -- outputs: vect_output_1_cell_winograd5_im, vect_output_1_cell_winograd5_re -- (length cst_nb_samples_in_5ndft each, cut then into subvectors of 5 inputs) winograd5_instances : FOR i IN 0 TO cst_nb_parallel_winograd5-1 GENERATE fill_for : FOR j IN 0 TO 4 GENERATE vect_input_1_cell_winograd5_im(i)(j) <= winograd_rearranged_input_im(j+5*i); vect_input_1_cell_winograd5_re(i)(j) <= winograd_rearranged_input_re(j+5*i); data_out_winograd5_im(j+5*i) <= vect_output_1_cell_winograd5_im(i)(j); data_out_winograd5_re(j+5*i) <= vect_output_1_cell_winograd5_re(i)(j); END GENERATE fill_for; winograd5_inst : ENTITY work.WINOGRAD5(Behavioral) PORT MAP( i_clk => i_clk, i_data_im => vect_input_1_cell_winograd5_im(i), i_data_re => vect_input_1_cell_winograd5_re(i), o_data_im => vect_output_1_cell_winograd5_im(i), o_data_re => vect_output_1_cell_winograd5_re(i) ); END GENERATE winograd5_instances; -- winograd results -- purpose: fill the 0th stage (input stage) of the matrix instanciating the -- butterfly (radix2) cells only. -- inputs: data_out_winograd5_re, data_out_winograd5_im -- outputs: matrix_inputs_outputs_radix2_cells_re(x)(0), matrix_inputs_outputs_radix2_cells_im(x)(0) fill_radix2_cells_input_matrix : FOR i IN 0 TO cst_nb_samples_in_5ndft-1 GENERATE matrix_inputs_outputs_radix2_cells_re(i)(0)(cst_winograd5_w_out_5ndft-1 DOWNTO 0) <= data_out_winograd5_re(i); matrix_inputs_outputs_radix2_cells_im(i)(0)(cst_winograd5_w_out_5ndft-1 DOWNTO 0) <= data_out_winograd5_im(i); END GENERATE fill_radix2_cells_input_matrix; -- purpose: instanciates the first butterfly cells stage (after the winograd -- results). If there is only 1 parallel winograd5 cell, couple_winograd5_nb -- will not be use between 0 and -1, so the FOR loop will no execute -- inputs: matrix_inputs_outputs_radix2_cells_re(x)(0), matrix_inputs_outputs_radix2_cells_im(x)(0) -- instances: radix_2_cell_winograd(radix2) -- outputs: matrix_inputs_outputs_radix2_cells_re(x)(1), matrix_inputs_outputs_radix2_cells_im(x)(1) radix2_cells_stage1 : FOR couple_winograd5_nb IN 0 TO cst_nb_parallel_winograd5/2-1 GENERATE radix2_winograd_out : FOR i IN 0 TO 4 GENERATE radix_2_out_winograd_inst : ENTITY work.radix_2_cell_winograd(radix2) GENERIC MAP( w_in => cst_winograd5_w_out_5ndft ) PORT MAP( i_clk => i_clk, i_cos => cos_k_pi_over_5_wb(i MOD 5), i_sin => sin_k_pi_over_5_wb(i MOD 5), i_data1_re => matrix_inputs_outputs_radix2_cells_re(couple_winograd5_nb*10+i)(0), i_data1_im => matrix_inputs_outputs_radix2_cells_im(couple_winograd5_nb*10+i)(0), i_data2_re => matrix_inputs_outputs_radix2_cells_re(couple_winograd5_nb*10+i+5)(0), i_data2_im => matrix_inputs_outputs_radix2_cells_im(couple_winograd5_nb*10+i+5)(0), o_data1_re => matrix_inputs_outputs_radix2_cells_re(couple_winograd5_nb*10+i)(1), o_data1_im => matrix_inputs_outputs_radix2_cells_im(couple_winograd5_nb*10+i)(1), o_data2_re => matrix_inputs_outputs_radix2_cells_re(couple_winograd5_nb*10+i+5)(1), o_data2_im => matrix_inputs_outputs_radix2_cells_im(couple_winograd5_nb*10+i+5)(1) ); END GENERATE radix2_winograd_out; END GENERATE radix2_cells_stage1; -- radix2 cells instanciation -- purpose: instanciates all the butterfly stages (except stage 1). The -- stages are filled decrementing the stage number -- inputs: matrix_inputs_outputs_radix2_cells_re(x)(1), matrix_inputs_outputs_radix2_cells_im(x)(1) -- instances: radix_2_cell_winograd(radix2) -- outputs: matrix_inputs_outputs_radix2_cells_re(x)(cst_log2_nb_parallel_winograd5), matrix_inputs_outputs_radix2_cells_im(x)(cst_log2_nb_parallel_winograd5) stages_generation : FOR stage IN 1 TO cst_log2_nb_parallel_winograd5-1 GENERATE k10_blocks : FOR cell_10_nb IN 1 TO 2**(stage-1) GENERATE parallel_cells : FOR cell_nb IN (cst_nb_samples_in_5ndft/(2**stage))*(cell_10_nb-1) TO (cst_nb_samples_in_5ndft/(2**stage))*(cell_10_nb)-1 GENERATE radix_2_generic_inst : ENTITY work.radix_2_cell_winograd(radix2) GENERIC MAP( w_in => cst_winograd5_w_out_5ndft+(cst_log2_nb_parallel_winograd5-stage)*cst_w_radix2_added ) PORT MAP( i_clk => i_clk, i_cos => cos_k_pi_over_n_wb((cst_nb_wn_coeffs/10/(2**(cst_log2_nb_parallel_winograd5-stage-1))*cell_nb) MOD cst_nb_wn_coeffs), i_sin => sin_k_pi_over_n_wb((cst_nb_wn_coeffs/10/(2**(cst_log2_nb_parallel_winograd5-stage-1))*cell_nb) MOD cst_nb_wn_coeffs), i_data1_re => matrix_inputs_outputs_radix2_cells_re(cell_nb)(cst_log2_nb_parallel_winograd5-stage), i_data1_im => matrix_inputs_outputs_radix2_cells_im(cell_nb)(cst_log2_nb_parallel_winograd5-stage), i_data2_re => matrix_inputs_outputs_radix2_cells_re(cell_nb+(cst_nb_samples_in_5ndft/(2**stage)))(cst_log2_nb_parallel_winograd5-stage), i_data2_im => matrix_inputs_outputs_radix2_cells_im(cell_nb+(cst_nb_samples_in_5ndft/(2**stage)))(cst_log2_nb_parallel_winograd5-stage), o_data1_re => matrix_inputs_outputs_radix2_cells_re(cell_nb)(cst_log2_nb_parallel_winograd5-stage+1), o_data1_im => matrix_inputs_outputs_radix2_cells_im(cell_nb)(cst_log2_nb_parallel_winograd5-stage+1), o_data2_re => matrix_inputs_outputs_radix2_cells_re(cell_nb+(cst_nb_samples_in_5ndft/(2**stage)))(cst_log2_nb_parallel_winograd5-stage+1), o_data2_im => matrix_inputs_outputs_radix2_cells_im(cell_nb+(cst_nb_samples_in_5ndft/(2**stage)))(cst_log2_nb_parallel_winograd5-stage+1) ); END GENERATE parallel_cells; END GENERATE k10_blocks; END GENERATE stages_generation; -- output results -- purpose: rounds the outputs to have cst_w_out_5ndft bits. Does not round -- if cst_w_out_5ndft = cst_dft_w_out_5ndft -- type : sequential -- inputs : i_clk, matrix_inputs_outputs_radix2_cells_re, matrix_inputs_outputs_radix2_cells_im -- outputs: o_data_re, i_data_im output_rounding : PROCESS (i_clk) IS BEGIN -- PROCESS output_rounding IF rising_edge(i_clk) THEN -- rising clock edge FOR i IN 0 TO cst_nb_samples_in_5ndft-1 LOOP IF(matrix_inputs_outputs_radix2_cells_re(i)(cst_log2_nb_parallel_winograd5)(cst_dft_w_out_5ndft-24-cst_w_out_5ndft) = '1' AND cst_w_out_5ndft < cst_dft_w_out_5ndft) THEN o_data_re(i) <= std_logic_vector(unsigned(signed(matrix_inputs_outputs_radix2_cells_re(i)(cst_log2_nb_parallel_winograd5)(cst_dft_w_out_5ndft-24 DOWNTO cst_dft_w_out_5ndft-23-cst_w_out_5ndft))+1)); ELSE o_data_re(i) <= matrix_inputs_outputs_radix2_cells_re(i)(cst_log2_nb_parallel_winograd5)(cst_dft_w_out_5ndft-24 DOWNTO cst_dft_w_out_5ndft-23-cst_w_out_5ndft); END IF; IF(matrix_inputs_outputs_radix2_cells_im(i)(cst_log2_nb_parallel_winograd5)(cst_dft_w_out_5ndft-24-cst_w_out_5ndft) = '1' AND cst_w_out_5ndft < cst_dft_w_out_5ndft) THEN o_data_im(i) <= std_logic_vector(unsigned(signed(matrix_inputs_outputs_radix2_cells_im(i)(cst_log2_nb_parallel_winograd5)(cst_dft_w_out_5ndft-24 DOWNTO cst_dft_w_out_5ndft-23-cst_w_out_5ndft))+1)); ELSE o_data_im(i) <= matrix_inputs_outputs_radix2_cells_im(i)(cst_log2_nb_parallel_winograd5)(cst_dft_w_out_5ndft-24 DOWNTO cst_dft_w_out_5ndft-23-cst_w_out_5ndft); END IF; END LOOP; -- i END IF; END PROCESS output_rounding; -- if you prefer the whole result, uncomment this section and comment the -- output_rounding process above. You should also change smpl_out_5ndft from -- cst_w_out_5ndft to cst_dft_w_out_5ndft (line 32 in FIVEn_dft_pkg). --fill_outputs : FOR i IN 0 TO cst_nb_samples_in_5ndft-1 GENERATE -- o_data_re(i) <= matrix_inputs_outputs_radix2_cells_re(i)(cst_log2_nb_parallel_winograd5); -- o_data_im(i) <= matrix_inputs_outputs_radix2_cells_im(i)(cst_log2_nb_parallel_winograd5); --END GENERATE fill_outputs; END ARCHITECTURE instanciating_cells;