Index: codec2-dev/src/fdmdv.c =================================================================== --- codec2-dev/src/fdmdv.c (revision 2846) +++ codec2-dev/src/fdmdv.c (working copy) @@ -50,11 +50,21 @@ #include "machdep.h" static int sync_uw[] = {1,-1,1,-1,1,-1}; - #ifdef __EMBEDDED__ #define printf gdb_stdio_printf #endif +#ifndef ARM_MATH_CM4 + #define SINF(a) sinf(a) + #define COSF(a) cosf(a) +#else + #define SINF(a) arm_sin_f32(a) + #define COSF(a) arm_cos_f32(a) +#endif + +static const COMP pi_on_4 = { .70710678118654752439, .70710678118654752439 }; // COSF(PI/4) , SINF(PI/4) + + /*--------------------------------------------------------------------------* \ FUNCTION....: fdmdv_create @@ -110,8 +120,8 @@ This helped PAPR for a few dB. We don't need to adjust rx phase as DQPSK takes care of that. */ - f->phase_tx[c].real = cosf(2.0*PI*c/(Nc+1)); - f->phase_tx[c].imag = sinf(2.0*PI*c/(Nc+1)); + f->phase_tx[c].real = COSF(2.0*PI*c/(Nc+1)); + f->phase_tx[c].imag = SINF(2.0*PI*c/(Nc+1)); f->phase_rx[c].real = 1.0; f->phase_rx[c].imag = 0.0; @@ -124,12 +134,12 @@ f->prev_tx_symbols[Nc].real = 2.0; fdmdv_set_fsep(f, FSEP); - f->freq[Nc].real = cosf(2.0*PI*0.0/FS); - f->freq[Nc].imag = sinf(2.0*PI*0.0/FS); + f->freq[Nc].real = COSF(2.0*PI*0.0/FS); + f->freq[Nc].imag = SINF(2.0*PI*0.0/FS); f->freq_pol[Nc] = 2.0*PI*0.0/FS; - f->fbb_rect.real = cosf(2.0*PI*FDMDV_FCENTRE/FS); - f->fbb_rect.imag = sinf(2.0*PI*FDMDV_FCENTRE/FS); + f->fbb_rect.real = COSF(2.0*PI*FDMDV_FCENTRE/FS); + f->fbb_rect.imag = SINF(2.0*PI*FDMDV_FCENTRE/FS); f->fbb_pol = 2.0*PI*FDMDV_FCENTRE/FS; f->fbb_phase_tx.real = 1.0; f->fbb_phase_tx.imag = 0.0; @@ -257,15 +267,15 @@ for(c=0; cNc/2; c++) { carrier_freq = (-f->Nc/2 + c)*f->fsep; - f->freq[c].real = cosf(2.0*PI*carrier_freq/FS); - f->freq[c].imag = sinf(2.0*PI*carrier_freq/FS); + f->freq[c].real = COSF(2.0*PI*carrier_freq/FS); + f->freq[c].imag = SINF(2.0*PI*carrier_freq/FS); f->freq_pol[c] = 2.0*PI*carrier_freq/FS; } for(c=f->Nc/2; cNc; c++) { carrier_freq = (-f->Nc/2 + c + 1)*f->fsep; - f->freq[c].real = cosf(2.0*PI*carrier_freq/FS); - f->freq[c].imag = sinf(2.0*PI*carrier_freq/FS); + f->freq[c].real = COSF(2.0*PI*carrier_freq/FS); + f->freq[c].imag = SINF(2.0*PI*carrier_freq/FS); f->freq_pol[c] = 2.0*PI*carrier_freq/FS; } } @@ -676,6 +686,12 @@ if (f >= 4) memcpy(&pilot_lut[M*(f-4)], pilot, M*sizeof(COMP)); } + + // create complex conjugate since we need this and only this later on + for (f=0;f<4*M;f++) + { + pilot_lut[f] = cconj(pilot_lut[f]); + } } @@ -806,10 +822,18 @@ f->pilot_baseband2[i] = f->pilot_baseband2[i+nin]; } +#ifndef ARM_MATH_CM4 for(i=0,j=NPILOTBASEBAND-nin; ipilot_baseband1[j] = cmult(rx_fdm[i], cconj(pilot[i])); - f->pilot_baseband2[j] = cmult(rx_fdm[i], cconj(prev_pilot[i])); + f->pilot_baseband1[j] = cmult(rx_fdm[i], pilot[i]); + f->pilot_baseband2[j] = cmult(rx_fdm[i], prev_pilot[i]); } +#else + // TODO: Maybe a handwritten mult taking advantage of rx_fdm[0] being + // used twice would be faster but this is for sure faster than + // the implementation above in any case. + arm_cmplx_mult_cmplx_f32(&rx_fdm[0].real,&pilot[0].real,&f->pilot_baseband1[NPILOTBASEBAND-nin].real,nin); + arm_cmplx_mult_cmplx_f32(&rx_fdm[0].real,&prev_pilot[0].real,&f->pilot_baseband2[NPILOTBASEBAND-nin].real,nin); +#endif lpf_peak_pick(&foff1, &max1, f->pilot_baseband1, f->pilot_lpf1, f->fft_pilot_cfg, f->S1, nin, do_fft); lpf_peak_pick(&foff2, &max2, f->pilot_baseband2, f->pilot_lpf2, f->fft_pilot_cfg, f->S2, nin, do_fft); @@ -840,8 +864,8 @@ float mag; int i; - foff_rect.real = cosf(2.0*PI*foff/FS); - foff_rect.imag = sinf(2.0*PI*foff/FS); + foff_rect.real = COSF(2.0*PI*foff/FS); + foff_rect.imag = SINF(2.0*PI*foff/FS); for(i=0; i