/*
 *  Quadbike 2
 *  Copyright (C) 2026 'Diminished'

 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.

 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.

 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/

#ifndef QB_FIR_VEC2_H
#define QB_FIR_VEC2_H

#include "build.h"

#if defined QB_VECTORS_GCC_CLANG || defined QB_VECTORS_MSVC_AVX2 || defined QB_VECTORS_MSVC_AVX512

#include "qb_types.h"
#include "fir.h"
#include "vector.h"

/*
  BEWARE:
  Histories and taps have now been downgraded to single-precision
  floating-point from double-precision. This is mainly to avoid a
  situation where 8-wide vectorised input has to be re-swizzled
  into 4-wide in order to use explicit double-precision AVX2 intrinsics.
  
  Downgrading the filters to single-precision avoids having to
  perform this reformatting of the data, and should be twice as
  fast in vector form, but obviously there is a risk that downgrading
  the precision could make the FIRs unstable. This does not seem
  to be a problem, but it is important to keep in mind.
  
  As a concession, the scalar version of this filter in fir.c/.h
  remains at double-precision, so if problems do occur down the line,
  it is possible to use the scalar version of the code instead to
  guarantee double-precision filtering, at the obvious cost of a
  lot of performance.
*/

typedef struct qb_fir_vec_s {
  // each element of this contains parallel histories:
  qb_vec_f_t histories[QB_FIR_MAX_TAPS]; // ring buffer
  u32_t last_index;  // index into ring buffer
  u32_t num_taps;    // BANDPASS_xK_NUM_TAPS
  // parallel sets of taps; all identical; will need to be populated this way on init
  qb_vec_f_t taps[QB_FIR_MAX_TAPS];
  s32_t delay;
} qb_fir_vec_t;

qb_err_t qb_fir_vec_init (qb_fir_vec_t *f, u8_t type, u8_t rate);

void qb_fir_vec_run (qb_vec_buf_t *inout, qb_fir_vec_t *fir, u8_t display_progress);

qb_err_t qb_generate_pll_carrier_vector  (qb_vec_buf_t *buf_v,             // input
                                          qb_vec_buf_t *squared,
                                          //s64_t filter_delay,
                                          s8_t phase_shift_smps,
                                          u8_t phase_ix,
//                                          float clipping_level,        // 0.0 means ultra clip, 1.0 means no clip
                                          float **clipped_carrier_out, // FIXME: if PLL is vectorised, this will need to be a vecbuf instead
                                          qb_fir_vec_t *bp_2k4_vec_p,
                                          u8_t display_progress,
                                          qb_inspect_t *inspect) ;

#endif // QB_VECTORS_NEW

#endif // QB_FIR_VEC2_H
