UHSDR/UHSDR-active-devel/mchf-eclipse/support/codec2-dev.patch

Index: codec2-dev/src/fdmdv.c
===================================================================
--- codec2-dev/src/fdmdv.c	(revision 2846)
+++ codec2-dev/src/fdmdv.c	(working copy)
@@ -50,11 +50,21 @@
 #include "machdep.h"
 
 static int sync_uw[] = {1,-1,1,-1,1,-1};
-
 #ifdef __EMBEDDED__
 #define printf gdb_stdio_printf
 #endif
 
+#ifndef ARM_MATH_CM4
+  #define SINF(a) sinf(a)
+  #define COSF(a) cosf(a)
+#else
+  #define SINF(a) arm_sin_f32(a)
+  #define COSF(a) arm_cos_f32(a)
+#endif
+
+static const COMP  pi_on_4 = { .70710678118654752439, .70710678118654752439 }; // COSF(PI/4) , SINF(PI/4)
+
+
 /*--------------------------------------------------------------------------* \
 
   FUNCTION....: fdmdv_create
@@ -110,8 +120,8 @@
            This helped PAPR for a few dB.  We don't need to adjust rx
            phase as DQPSK takes care of that. */
 
-	f->phase_tx[c].real = cosf(2.0*PI*c/(Nc+1));
- 	f->phase_tx[c].imag = sinf(2.0*PI*c/(Nc+1));
+	f->phase_tx[c].real = COSF(2.0*PI*c/(Nc+1));
+ 	f->phase_tx[c].imag = SINF(2.0*PI*c/(Nc+1));
 
 	f->phase_rx[c].real = 1.0;
  	f->phase_rx[c].imag = 0.0;
@@ -124,12 +134,12 @@
     f->prev_tx_symbols[Nc].real = 2.0;
 
     fdmdv_set_fsep(f, FSEP);
-    f->freq[Nc].real = cosf(2.0*PI*0.0/FS);
-    f->freq[Nc].imag = sinf(2.0*PI*0.0/FS);
+    f->freq[Nc].real = COSF(2.0*PI*0.0/FS);
+    f->freq[Nc].imag = SINF(2.0*PI*0.0/FS);
     f->freq_pol[Nc]  = 2.0*PI*0.0/FS;
 
-    f->fbb_rect.real     = cosf(2.0*PI*FDMDV_FCENTRE/FS);
-    f->fbb_rect.imag     = sinf(2.0*PI*FDMDV_FCENTRE/FS);
+    f->fbb_rect.real     = COSF(2.0*PI*FDMDV_FCENTRE/FS);
+    f->fbb_rect.imag     = SINF(2.0*PI*FDMDV_FCENTRE/FS);
     f->fbb_pol           = 2.0*PI*FDMDV_FCENTRE/FS;
     f->fbb_phase_tx.real = 1.0;
     f->fbb_phase_tx.imag = 0.0;
@@ -257,15 +267,15 @@
 
     for(c=0; c<f->Nc/2; c++) {
 	carrier_freq = (-f->Nc/2 + c)*f->fsep;
-	f->freq[c].real = cosf(2.0*PI*carrier_freq/FS);
- 	f->freq[c].imag = sinf(2.0*PI*carrier_freq/FS);
+	f->freq[c].real = COSF(2.0*PI*carrier_freq/FS);
+ 	f->freq[c].imag = SINF(2.0*PI*carrier_freq/FS);
  	f->freq_pol[c]  = 2.0*PI*carrier_freq/FS;
     }
 
     for(c=f->Nc/2; c<f->Nc; c++) {
 	carrier_freq = (-f->Nc/2 + c + 1)*f->fsep;
-	f->freq[c].real = cosf(2.0*PI*carrier_freq/FS);
- 	f->freq[c].imag = sinf(2.0*PI*carrier_freq/FS);
+	f->freq[c].real = COSF(2.0*PI*carrier_freq/FS);
+ 	f->freq[c].imag = SINF(2.0*PI*carrier_freq/FS);
  	f->freq_pol[c]  = 2.0*PI*carrier_freq/FS;
     }
 }
@@ -676,6 +686,12 @@
 	if (f >= 4)
 	    memcpy(&pilot_lut[M*(f-4)], pilot, M*sizeof(COMP));
     }
+   
+    // create complex conjugate since we need this and only this later on 
+    for (f=0;f<4*M;f++)
+    {
+        pilot_lut[f] = cconj(pilot_lut[f]);
+    }
 
 }
 
@@ -806,10 +822,18 @@
 	f->pilot_baseband2[i] = f->pilot_baseband2[i+nin];
     }
 
+#ifndef ARM_MATH_CM4
     for(i=0,j=NPILOTBASEBAND-nin; i<nin; i++,j++) {
-       	f->pilot_baseband1[j] = cmult(rx_fdm[i], cconj(pilot[i]));
-	f->pilot_baseband2[j] = cmult(rx_fdm[i], cconj(prev_pilot[i]));
+       	f->pilot_baseband1[j] = cmult(rx_fdm[i], pilot[i]);
+	f->pilot_baseband2[j] = cmult(rx_fdm[i], prev_pilot[i]);
     }
+#else
+    // TODO: Maybe a handwritten mult taking advantage of rx_fdm[0] being 
+    // used twice would be faster but this is for sure faster than 
+    // the implementation above in any case.
+    arm_cmplx_mult_cmplx_f32(&rx_fdm[0].real,&pilot[0].real,&f->pilot_baseband1[NPILOTBASEBAND-nin].real,nin);
+    arm_cmplx_mult_cmplx_f32(&rx_fdm[0].real,&prev_pilot[0].real,&f->pilot_baseband2[NPILOTBASEBAND-nin].real,nin);
+#endif
 
     lpf_peak_pick(&foff1, &max1, f->pilot_baseband1, f->pilot_lpf1, f->fft_pilot_cfg, f->S1, nin, do_fft);
     lpf_peak_pick(&foff2, &max2, f->pilot_baseband2, f->pilot_lpf2, f->fft_pilot_cfg, f->S2, nin, do_fft);
@@ -840,8 +864,8 @@
     float mag;
     int   i;
 
-    foff_rect.real = cosf(2.0*PI*foff/FS);
-    foff_rect.imag = sinf(2.0*PI*foff/FS);
+    foff_rect.real = COSF(2.0*PI*foff/FS);
+    foff_rect.imag = SINF(2.0*PI*foff/FS);
     for(i=0; i<nin; i++) {
 	*foff_phase_rect = cmult(*foff_phase_rect, foff_rect);
 	rx_fdm_fcorr[i] = cmult(rx_fdm[i], *foff_phase_rect);
@@ -992,7 +1016,41 @@
     return dec_rate*acc;
 }
 
+/*---------------------------------------------------------------------------*\
 
+  FUNCTION....: fir_filter2()
+  AUTHOR......: David Rowe
+  DATE CREATED: July 2014
+
+  Helper fir filter function which runs 2x same filter on two sets of data.
+\*---------------------------------------------------------------------------*/
+
+static void fir_filter2(float acc[2], float mem[], float coeff[], const unsigned int dec_rate) {
+    acc[0] = 0.0;
+    acc[1] = 0.0;
+
+    float c,m1,m2,a1,a2;
+    float* inpCmplx = &mem[0];
+    float* coeffPtr = &coeff[0];
+
+    int m;
+
+    for(m=0; m<NFILTER; m+=dec_rate) {
+        c = *coeffPtr;
+        m1 = inpCmplx[0];
+        m2 = inpCmplx[1];
+        a1 = c * m1;
+        a2 = c * m2;
+        acc[0] += a1;
+        inpCmplx+= dec_rate*2;
+        acc[1] += a2;
+        coeffPtr+= dec_rate;
+    }
+
+    acc[0] *= dec_rate;
+    acc[1] *= dec_rate;
+}
+
 /*---------------------------------------------------------------------------*\
 
   FUNCTION....: down_convert_and_rx_filter()
@@ -1049,8 +1107,8 @@
 
         //PROFILE_SAMPLE(windback_start);
         windback_phase           = -freq_pol[c]*NFILTER;
-        windback_phase_rect.real = cosf(windback_phase);
-        windback_phase_rect.imag = sinf(windback_phase);
+        windback_phase_rect.real = COSF(windback_phase);
+        windback_phase_rect.imag = SINF(windback_phase);
         phase_rx[c]              = cmult(phase_rx[c],windback_phase_rect);
         //PROFILE_SAMPLE_AND_LOG(downconvert_start, windback_start, "        windback");
 
@@ -1082,8 +1140,9 @@
            for(m=0; m<NFILTER; m++)
                rx_filt[c][k] = cadd(rx_filt[c][k], fcmult(gt_alpha5_root[m], rx_baseband[st+i+m]));
            #else
-           rx_filt[c][k].real = fir_filter(&rx_baseband[st+i].real, (float*)gt_alpha5_root, dec_rate);
-           rx_filt[c][k].imag = fir_filter(&rx_baseband[st+i].imag, (float*)gt_alpha5_root, dec_rate);
+            // rx_filt[c][k].real = fir_filter(&rx_baseband[st+i].real, (float*)gt_alpha5_root, dec_rate);
+            // rx_filt[c][k].imag = fir_filter(&rx_baseband[st+i].imag, (float*)gt_alpha5_root, dec_rate);
+	    fir_filter2(&rx_filt[c][k].real,&rx_baseband[st+i].real, (float*)gt_alpha5_root, dec_rate);
            #endif
         }
         //PROFILE_SAMPLE_AND_LOG2(filter_start, "        filter");
@@ -1155,8 +1214,8 @@
        out single DFT at frequency 2*pi/P */
 
     x.real = 0.0; x.imag = 0.0;
-    freq.real = cosf(2*PI/P);
-    freq.imag = sinf(2*PI/P);
+    freq.real = COSF(2*PI/P);
+    freq.imag = SINF(2*PI/P);
     phase.real = 1.0;
     phase.imag = 0.0;
 
@@ -1213,13 +1272,10 @@
                    COMP rx_symbols[], int old_qpsk_mapping)
 {
     int   c;
-    COMP  pi_on_4;
     COMP  d;
     int   msb=0, lsb=0;
     float ferr, norm;
 
-    pi_on_4.real = cosf(PI/4.0);
-    pi_on_4.imag = sinf(PI/4.0);
 
     /* Extra 45 degree clockwise lets us use real and imag axis as
        decision boundaries. "norm" makes sure the phase subtraction
@@ -1296,11 +1352,8 @@
     float s[NC+1];
     COMP  refl_symbols[NC+1];
     float n[NC+1];
-    COMP  pi_on_4;
     int   c;
 
-    pi_on_4.real = cosf(PI/4.0);
-    pi_on_4.imag = sinf(PI/4.0);
 
     /* mag of each symbol is distance from origin, this gives us a
        vector of mags, one for each carrier. */
first commit 2022-11-08 16:13:55 +01:00			`Index: codec2-dev/src/fdmdv.c`
			`===================================================================`
			`--- codec2-dev/src/fdmdv.c (revision 2846)`
			`+++ codec2-dev/src/fdmdv.c (working copy)`
			`@@ -50,11 +50,21 @@`
			`#include "machdep.h"`

			`static int sync_uw[] = {1,-1,1,-1,1,-1};`
			`-`
			`#ifdef __EMBEDDED__`
			`#define printf gdb_stdio_printf`
			`#endif`

			`+#ifndef ARM_MATH_CM4`
			`+ #define SINF(a) sinf(a)`
			`+ #define COSF(a) cosf(a)`
			`+#else`
			`+ #define SINF(a) arm_sin_f32(a)`
			`+ #define COSF(a) arm_cos_f32(a)`
			`+#endif`
			`+`
			`+static const COMP pi_on_4 = { .70710678118654752439, .70710678118654752439 }; // COSF(PI/4) , SINF(PI/4)`
			`+`
			`+`
			`/-------------------------------------------------------------------------- \`

			`FUNCTION....: fdmdv_create`
			`@@ -110,8 +120,8 @@`
			`This helped PAPR for a few dB. We don't need to adjust rx`
			`phase as DQPSK takes care of that. */`

			`- f->phase_tx[c].real = cosf(2.0PIc/(Nc+1));`
			`- f->phase_tx[c].imag = sinf(2.0PIc/(Nc+1));`
			`+ f->phase_tx[c].real = COSF(2.0PIc/(Nc+1));`
			`+ f->phase_tx[c].imag = SINF(2.0PIc/(Nc+1));`

			`f->phase_rx[c].real = 1.0;`
			`f->phase_rx[c].imag = 0.0;`
			`@@ -124,12 +134,12 @@`
			`f->prev_tx_symbols[Nc].real = 2.0;`

			`fdmdv_set_fsep(f, FSEP);`
			`- f->freq[Nc].real = cosf(2.0PI0.0/FS);`
			`- f->freq[Nc].imag = sinf(2.0PI0.0/FS);`
			`+ f->freq[Nc].real = COSF(2.0PI0.0/FS);`
			`+ f->freq[Nc].imag = SINF(2.0PI0.0/FS);`
			`f->freq_pol[Nc] = 2.0PI0.0/FS;`

			`- f->fbb_rect.real = cosf(2.0PIFDMDV_FCENTRE/FS);`
			`- f->fbb_rect.imag = sinf(2.0PIFDMDV_FCENTRE/FS);`
			`+ f->fbb_rect.real = COSF(2.0PIFDMDV_FCENTRE/FS);`
			`+ f->fbb_rect.imag = SINF(2.0PIFDMDV_FCENTRE/FS);`
			`f->fbb_pol = 2.0PIFDMDV_FCENTRE/FS;`
			`f->fbb_phase_tx.real = 1.0;`
			`f->fbb_phase_tx.imag = 0.0;`
			`@@ -257,15 +267,15 @@`

			`for(c=0; c<f->Nc/2; c++) {`
			`carrier_freq = (-f->Nc/2 + c)*f->fsep;`
			`- f->freq[c].real = cosf(2.0PIcarrier_freq/FS);`
			`- f->freq[c].imag = sinf(2.0PIcarrier_freq/FS);`
			`+ f->freq[c].real = COSF(2.0PIcarrier_freq/FS);`
			`+ f->freq[c].imag = SINF(2.0PIcarrier_freq/FS);`
			`f->freq_pol[c] = 2.0PIcarrier_freq/FS;`
			`}`

			`for(c=f->Nc/2; c<f->Nc; c++) {`
			`carrier_freq = (-f->Nc/2 + c + 1)*f->fsep;`
			`- f->freq[c].real = cosf(2.0PIcarrier_freq/FS);`
			`- f->freq[c].imag = sinf(2.0PIcarrier_freq/FS);`
			`+ f->freq[c].real = COSF(2.0PIcarrier_freq/FS);`
			`+ f->freq[c].imag = SINF(2.0PIcarrier_freq/FS);`
			`f->freq_pol[c] = 2.0PIcarrier_freq/FS;`
			`}`
			`}`
			`@@ -676,6 +686,12 @@`
			`if (f >= 4)`
			`memcpy(&pilot_lut[M(f-4)], pilot, Msizeof(COMP));`
			`}`
			`+`
			`+ // create complex conjugate since we need this and only this later on`
			`+ for (f=0;f<4*M;f++)`
			`+ {`
			`+ pilot_lut[f] = cconj(pilot_lut[f]);`
			`+ }`

			`}`

			`@@ -806,10 +822,18 @@`
			`f->pilot_baseband2[i] = f->pilot_baseband2[i+nin];`
			`}`

			`+#ifndef ARM_MATH_CM4`
			`for(i=0,j=NPILOTBASEBAND-nin; i<nin; i++,j++) {`
			`- f->pilot_baseband1[j] = cmult(rx_fdm[i], cconj(pilot[i]));`
			`- f->pilot_baseband2[j] = cmult(rx_fdm[i], cconj(prev_pilot[i]));`
			`+ f->pilot_baseband1[j] = cmult(rx_fdm[i], pilot[i]);`
			`+ f->pilot_baseband2[j] = cmult(rx_fdm[i], prev_pilot[i]);`
			`}`
			`+#else`
			`+ // TODO: Maybe a handwritten mult taking advantage of rx_fdm[0] being`
			`+ // used twice would be faster but this is for sure faster than`
			`+ // the implementation above in any case.`
			`+ arm_cmplx_mult_cmplx_f32(&rx_fdm[0].real,&pilot[0].real,&f->pilot_baseband1[NPILOTBASEBAND-nin].real,nin);`
			`+ arm_cmplx_mult_cmplx_f32(&rx_fdm[0].real,&prev_pilot[0].real,&f->pilot_baseband2[NPILOTBASEBAND-nin].real,nin);`
			`+#endif`

			`lpf_peak_pick(&foff1, &max1, f->pilot_baseband1, f->pilot_lpf1, f->fft_pilot_cfg, f->S1, nin, do_fft);`
			`lpf_peak_pick(&foff2, &max2, f->pilot_baseband2, f->pilot_lpf2, f->fft_pilot_cfg, f->S2, nin, do_fft);`
			`@@ -840,8 +864,8 @@`
			`float mag;`
			`int i;`

			`- foff_rect.real = cosf(2.0PIfoff/FS);`
			`- foff_rect.imag = sinf(2.0PIfoff/FS);`
			`+ foff_rect.real = COSF(2.0PIfoff/FS);`
			`+ foff_rect.imag = SINF(2.0PIfoff/FS);`
			`for(i=0; i<nin; i++) {`
			`foff_phase_rect = cmult(foff_phase_rect, foff_rect);`
			`rx_fdm_fcorr[i] = cmult(rx_fdm[i], *foff_phase_rect);`
			`@@ -992,7 +1016,41 @@`
			`return dec_rate*acc;`
			`}`

			`+/---------------------------------------------------------------------------\`

			`+ FUNCTION....: fir_filter2()`
			`+ AUTHOR......: David Rowe`
			`+ DATE CREATED: July 2014`
			`+`
			`+ Helper fir filter function which runs 2x same filter on two sets of data.`
			`+\---------------------------------------------------------------------------/`
			`+`
			`+static void fir_filter2(float acc[2], float mem[], float coeff[], const unsigned int dec_rate) {`
			`+ acc[0] = 0.0;`
			`+ acc[1] = 0.0;`
			`+`
			`+ float c,m1,m2,a1,a2;`
			`+ float* inpCmplx = &mem[0];`
			`+ float* coeffPtr = &coeff[0];`
			`+`
			`+ int m;`
			`+`
			`+ for(m=0; m<NFILTER; m+=dec_rate) {`
			`+ c = *coeffPtr;`
			`+ m1 = inpCmplx[0];`
			`+ m2 = inpCmplx[1];`
			`+ a1 = c * m1;`
			`+ a2 = c * m2;`
			`+ acc[0] += a1;`
			`+ inpCmplx+= dec_rate*2;`
			`+ acc[1] += a2;`
			`+ coeffPtr+= dec_rate;`
			`+ }`
			`+`
			`+ acc[0] *= dec_rate;`
			`+ acc[1] *= dec_rate;`
			`+}`
			`+`
			`/---------------------------------------------------------------------------\`

			`FUNCTION....: down_convert_and_rx_filter()`
			`@@ -1049,8 +1107,8 @@`

			`//PROFILE_SAMPLE(windback_start);`
			`windback_phase = -freq_pol[c]*NFILTER;`
			`- windback_phase_rect.real = cosf(windback_phase);`
			`- windback_phase_rect.imag = sinf(windback_phase);`
			`+ windback_phase_rect.real = COSF(windback_phase);`
			`+ windback_phase_rect.imag = SINF(windback_phase);`
			`phase_rx[c] = cmult(phase_rx[c],windback_phase_rect);`
			`//PROFILE_SAMPLE_AND_LOG(downconvert_start, windback_start, " windback");`

			`@@ -1082,8 +1140,9 @@`
			`for(m=0; m<NFILTER; m++)`
			`rx_filt[c][k] = cadd(rx_filt[c][k], fcmult(gt_alpha5_root[m], rx_baseband[st+i+m]));`
			`#else`
			`- rx_filt[c][k].real = fir_filter(&rx_baseband[st+i].real, (float*)gt_alpha5_root, dec_rate);`
			`- rx_filt[c][k].imag = fir_filter(&rx_baseband[st+i].imag, (float*)gt_alpha5_root, dec_rate);`
			`+ // rx_filt[c][k].real = fir_filter(&rx_baseband[st+i].real, (float*)gt_alpha5_root, dec_rate);`
			`+ // rx_filt[c][k].imag = fir_filter(&rx_baseband[st+i].imag, (float*)gt_alpha5_root, dec_rate);`
			`+ fir_filter2(&rx_filt[c][k].real,&rx_baseband[st+i].real, (float*)gt_alpha5_root, dec_rate);`
			`#endif`
			`}`
			`//PROFILE_SAMPLE_AND_LOG2(filter_start, " filter");`
			`@@ -1155,8 +1214,8 @@`
			`out single DFT at frequency 2pi/P /`

			`x.real = 0.0; x.imag = 0.0;`
			`- freq.real = cosf(2*PI/P);`
			`- freq.imag = sinf(2*PI/P);`
			`+ freq.real = COSF(2*PI/P);`
			`+ freq.imag = SINF(2*PI/P);`
			`phase.real = 1.0;`
			`phase.imag = 0.0;`

			`@@ -1213,13 +1272,10 @@`
			`COMP rx_symbols[], int old_qpsk_mapping)`
			`{`
			`int c;`
			`- COMP pi_on_4;`
			`COMP d;`
			`int msb=0, lsb=0;`
			`float ferr, norm;`

			`- pi_on_4.real = cosf(PI/4.0);`
			`- pi_on_4.imag = sinf(PI/4.0);`

			`/* Extra 45 degree clockwise lets us use real and imag axis as`
			`decision boundaries. "norm" makes sure the phase subtraction`
			`@@ -1296,11 +1352,8 @@`
			`float s[NC+1];`
			`COMP refl_symbols[NC+1];`
			`float n[NC+1];`
			`- COMP pi_on_4;`
			`int c;`

			`- pi_on_4.real = cosf(PI/4.0);`
			`- pi_on_4.imag = sinf(PI/4.0);`

			`/* mag of each symbol is distance from origin, this gives us a`
			`vector of mags, one for each carrier. */`