UHSDR/UHSDR-active-devel/mchf-eclipse/drivers/freedv/codec2.c
2022-08-24 08:39:13 +02:00

2687 lines
84 KiB
C
Executable File

/*---------------------------------------------------------------------------*\
FILE........: codec2.c
AUTHOR......: David Rowe
DATE CREATED: 21/8/2010
Codec2 fully quantised encoder and decoder functions. If you want use
codec2, the codec2_xxx functions are for you.
\*---------------------------------------------------------------------------*/
/*
Copyright (C) 2010 David Rowe
All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License version 2.1, as
published by the Free Software Foundation. This program is
distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "defines.h"
#include "codec2_fft.h"
#include "sine.h"
#include "nlp.h"
#include "dump.h"
#include "lpc.h"
#include "quantise.h"
#include "phase.h"
#include "interp.h"
#include "postfilter.h"
#include "codec2.h"
#include "lsp.h"
#include "newamp2.h"
#include "codec2_internal.h"
#include "machdep.h"
#include "bpf.h"
#include "bpfb.h"
#include "c2wideband.h"
#include "debug_alloc.h"
/*---------------------------------------------------------------------------* \
FUNCTION HEADERS
\*---------------------------------------------------------------------------*/
void analyse_one_frame(struct CODEC2 *c2, MODEL *model, short speech[]);
void synthesise_one_frame(struct CODEC2 *c2, short speech[], MODEL *model,
COMP Aw[], float gain);
void codec2_encode_3200(struct CODEC2 *c2, unsigned char * bits, short speech[]);
void codec2_decode_3200(struct CODEC2 *c2, short speech[], const unsigned char * bits);
void codec2_encode_2400(struct CODEC2 *c2, unsigned char * bits, short speech[]);
void codec2_decode_2400(struct CODEC2 *c2, short speech[], const unsigned char * bits);
void codec2_encode_1600(struct CODEC2 *c2, unsigned char * bits, short speech[]);
void codec2_decode_1600(struct CODEC2 *c2, short speech[], const unsigned char * bits);
void codec2_encode_1400(struct CODEC2 *c2, unsigned char * bits, short speech[]);
void codec2_decode_1400(struct CODEC2 *c2, short speech[], const unsigned char * bits);
void codec2_encode_1300(struct CODEC2 *c2, unsigned char * bits, short speech[]);
void codec2_decode_1300(struct CODEC2 *c2, short speech[], const unsigned char * bits, float ber_est);
void codec2_encode_1200(struct CODEC2 *c2, unsigned char * bits, short speech[]);
void codec2_decode_1200(struct CODEC2 *c2, short speech[], const unsigned char * bits);
void codec2_encode_700(struct CODEC2 *c2, unsigned char * bits, short speech[]);
void codec2_decode_700(struct CODEC2 *c2, short speech[], const unsigned char * bits);
void codec2_encode_700b(struct CODEC2 *c2, unsigned char * bits, short speech[]);
void codec2_decode_700b(struct CODEC2 *c2, short speech[], const unsigned char * bits);
void codec2_encode_700c(struct CODEC2 *c2, unsigned char * bits, short speech[]);
void codec2_decode_700c(struct CODEC2 *c2, short speech[], const unsigned char * bits);
void codec2_encode_450(struct CODEC2 *c2, unsigned char * bits, short speech[]);
void codec2_decode_450(struct CODEC2 *c2, short speech[], const unsigned char * bits);
void codec2_decode_450pwb(struct CODEC2 *c2, short speech[], const unsigned char * bits);
static void ear_protection(float in_out[], int n);
/*---------------------------------------------------------------------------*\
FUNCTIONS
\*---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*\
FUNCTION....: codec2_create
AUTHOR......: David Rowe
DATE CREATED: 21/8/2010
Create and initialise an instance of the codec. Returns a pointer
to the codec states or NULL on failure. One set of states is
sufficient for a full duuplex codec (i.e. an encoder and decoder).
You don't need separate states for encoders and decoders. See
c2enc.c and c2dec.c for examples.
\*---------------------------------------------------------------------------*/
//Don't create CODEC2_MODE_450PWB for Encoding as it has undefined behavior !
struct CODEC2 * codec2_create(int mode)
{
struct CODEC2 *c2;
int i,l;
if (!((mode >= 0) && (mode <= CODEC2_MODE_WB))) {
return NULL;
}
#ifndef CORTEX_M4
if (( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, mode)) ||
( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, mode)) ||
( CODEC2_MODE_ACTIVE(CODEC2_MODE_WB, mode)) ) {
return NULL;
}
#endif
c2 = (struct CODEC2*)MALLOC(sizeof(struct CODEC2));
if (c2 == NULL)
return NULL;
c2->mode = mode;
/* store constants in a few places for convenience */
if( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, mode) == 0){
c2->c2const = c2const_create(8000, N_S);
}else{
c2->c2const = c2const_create(16000, N_S);
}
c2->Fs = c2->c2const.Fs;
int n_samp = c2->n_samp = c2->c2const.n_samp;
int m_pitch = c2->m_pitch = c2->c2const.m_pitch;
c2->Pn = (float*)MALLOC(2*n_samp*sizeof(float));
if (c2->Pn == NULL) {
return NULL;
}
c2->Sn_ = (float*)MALLOC(2*n_samp*sizeof(float));
if (c2->Sn_ == NULL) {
FREE(c2->Pn);
return NULL;
}
c2->w = (float*)MALLOC(m_pitch*sizeof(float));
if (c2->w == NULL) {
FREE(c2->Pn);
FREE(c2->Sn_);
return NULL;
}
c2->Sn = (float*)MALLOC(m_pitch*sizeof(float));
if (c2->Sn == NULL) {
FREE(c2->Pn);
FREE(c2->Sn_);
FREE(c2->w);
return NULL;
}
for(i=0; i<m_pitch; i++)
c2->Sn[i] = 1.0;
c2->hpf_states[0] = c2->hpf_states[1] = 0.0;
for(i=0; i<2*n_samp; i++)
c2->Sn_[i] = 0;
c2->fft_fwd_cfg = codec2_fft_alloc(FFT_ENC, 0, NULL, NULL);
c2->fftr_fwd_cfg = codec2_fftr_alloc(FFT_ENC, 0, NULL, NULL);
make_analysis_window(&c2->c2const, c2->fft_fwd_cfg, c2->w,c2->W);
make_synthesis_window(&c2->c2const, c2->Pn);
c2->fftr_inv_cfg = codec2_fftr_alloc(FFT_DEC, 1, NULL, NULL);
quantise_init();
c2->prev_f0_enc = 1/P_MAX_S;
c2->bg_est = 0.0;
c2->ex_phase = 0.0;
for(l=1; l<=MAX_AMP; l++)
c2->prev_model_dec.A[l] = 0.0;
c2->prev_model_dec.Wo = TWO_PI/c2->c2const.p_max;
c2->prev_model_dec.L = PI/c2->prev_model_dec.Wo;
c2->prev_model_dec.voiced = 0;
for(i=0; i<LPC_ORD; i++) {
c2->prev_lsps_dec[i] = i*PI/(LPC_ORD+1);
}
c2->prev_e_dec = 1;
c2->nlp = nlp_create(&c2->c2const);
if (c2->nlp == NULL) {
return NULL;
}
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, mode))
c2->gray = 0; // natural binary better for trellis decoding (hopefully added later)
else
c2->gray = 1;
c2->lpc_pf = 1; c2->bass_boost = 1; c2->beta = LPCPF_BETA; c2->gamma = LPCPF_GAMMA;
c2->xq_enc[0] = c2->xq_enc[1] = 0.0;
c2->xq_dec[0] = c2->xq_dec[1] = 0.0;
c2->smoothing = 0;
c2->bpf_buf = (float*)MALLOC(sizeof(float)*(BPF_N+4*c2->n_samp));
assert(c2->bpf_buf != NULL);
for(i=0; i<BPF_N+4*c2->n_samp; i++)
c2->bpf_buf[i] = 0.0;
c2->softdec = NULL;
/* newamp1 initialisation */
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) {
mel_sample_freqs_kHz(c2->rate_K_sample_freqs_kHz, NEWAMP1_K, ftomel(200.0), ftomel(3700.0) );
int k;
for(k=0; k<NEWAMP1_K; k++) {
c2->prev_rate_K_vec_[k] = 0.0;
}
c2->Wo_left = 0.0;
c2->voicing_left = 0;;
c2->phase_fft_fwd_cfg = codec2_fft_alloc(NEWAMP1_PHASE_NFFT, 0, NULL, NULL);
c2->phase_fft_inv_cfg = codec2_fft_alloc(NEWAMP1_PHASE_NFFT, 1, NULL, NULL);
}
#ifndef CORTEX_M4
/* newamp2 initialisation */
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) {
n2_mel_sample_freqs_kHz(c2->n2_rate_K_sample_freqs_kHz, NEWAMP2_K);
int k;
for(k=0; k<NEWAMP2_K; k++) {
c2->n2_prev_rate_K_vec_[k] = 0.0;
}
c2->Wo_left = 0.0;
c2->voicing_left = 0;;
c2->phase_fft_fwd_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 0, NULL, NULL);
c2->phase_fft_inv_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 1, NULL, NULL);
}
/* newamp2 PWB initialisation */
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) {
n2_mel_sample_freqs_kHz(c2->n2_pwb_rate_K_sample_freqs_kHz, NEWAMP2_16K_K);
int k;
for(k=0; k<NEWAMP2_16K_K; k++) {
c2->n2_pwb_prev_rate_K_vec_[k] = 0.0;
}
c2->Wo_left = 0.0;
c2->voicing_left = 0;;
c2->phase_fft_fwd_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 0, NULL, NULL);
c2->phase_fft_inv_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 1, NULL, NULL);
}
#endif
c2->flspEWov = NULL;
// make sure that one of the two decode function pointers is empty
// for the encode function pointer this is not required since we always set it
// to a meaningful value
c2->decode = NULL;
c2->decode_ber = NULL;
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode))
{
c2->encode = codec2_encode_3200;
c2->decode = codec2_decode_3200;
}
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode))
{
c2->encode = codec2_encode_2400;
c2->decode = codec2_decode_2400;
}
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode))
{
c2->encode = codec2_encode_1600;
c2->decode = codec2_decode_1600;
}
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode))
{
c2->encode = codec2_encode_1400;
c2->decode = codec2_decode_1400;
}
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode))
{
c2->encode = codec2_encode_1300;
c2->decode_ber = codec2_decode_1300;
}
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode))
{
c2->encode = codec2_encode_1200;
c2->decode = codec2_decode_1200;
}
#ifndef CORTEX_M4
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700, c2->mode))
{
c2->encode = codec2_encode_700;
c2->decode = codec2_decode_700;
}
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, c2->mode))
{
c2->encode = codec2_encode_700b;
c2->decode = codec2_decode_700b;
}
#endif
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode))
{
c2->encode = codec2_encode_700c;
c2->decode = codec2_decode_700c;
}
#ifndef CORTEX_M4
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode))
{
c2->encode = codec2_encode_450;
c2->decode = codec2_decode_450;
}
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode))
{
//Encode PWB doesnt make sense
c2->encode = codec2_encode_450;
c2->decode = codec2_decode_450pwb;
}
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_WB, c2->mode))
{
//Encode PWB doesnt make sense
c2->encode = codec2_encode_wb;
c2->decode = codec2_decode_wb;
}
#endif
return c2;
}
/*---------------------------------------------------------------------------*\
FUNCTION....: codec2_destroy
AUTHOR......: David Rowe
DATE CREATED: 21/8/2010
Destroy an instance of the codec.
\*---------------------------------------------------------------------------*/
void codec2_destroy(struct CODEC2 *c2)
{
assert(c2 != NULL);
FREE(c2->bpf_buf);
nlp_destroy(c2->nlp);
codec2_fft_free(c2->fft_fwd_cfg);
codec2_fftr_free(c2->fftr_fwd_cfg);
codec2_fftr_free(c2->fftr_inv_cfg);
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) {
codec2_fft_free(c2->phase_fft_fwd_cfg);
codec2_fft_free(c2->phase_fft_inv_cfg);
}
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) {
codec2_fft_free(c2->phase_fft_fwd_cfg);
codec2_fft_free(c2->phase_fft_inv_cfg);
}
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) {
codec2_fft_free(c2->phase_fft_fwd_cfg);
codec2_fft_free(c2->phase_fft_inv_cfg);
}
FREE(c2->Pn);
FREE(c2->Sn);
FREE(c2->w);
FREE(c2->Sn_);
FREE(c2);
}
/*---------------------------------------------------------------------------*\
FUNCTION....: codec2_bits_per_frame
AUTHOR......: David Rowe
DATE CREATED: Nov 14 2011
Returns the number of bits per frame.
\*---------------------------------------------------------------------------*/
int codec2_bits_per_frame(struct CODEC2 *c2) {
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode))
return 64;
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode))
return 48;
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode))
return 64;
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode))
return 56;
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode))
return 52;
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode))
return 48;
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700, c2->mode))
return 28;
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, c2->mode))
return 28;
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode))
return 28;
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode))
return 18;
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode))
return 18;
//TODO: verify this
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_WB, c2->mode))
return 64;
return 0; /* shouldn't get here */
}
/*---------------------------------------------------------------------------*\
FUNCTION....: codec2_samples_per_frame
AUTHOR......: David Rowe
DATE CREATED: Nov 14 2011
Returns the number of speech samples per frame.
\*---------------------------------------------------------------------------*/
int codec2_samples_per_frame(struct CODEC2 *c2) {
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode))
return 160;
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode))
return 160;
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode))
return 320;
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode))
return 320;
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode))
return 320;
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode))
return 320;
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700, c2->mode))
return 320;
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, c2->mode))
return 320;
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode))
return 320;
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode))
return 320;
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode))
return 640;
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_WB, c2->mode))
return 160;
return 0; /* shouldnt get here */
}
void codec2_encode(struct CODEC2 *c2, unsigned char *bits, short speech[])
{
assert(c2 != NULL);
assert(c2->encode != NULL);
c2->encode(c2, bits, speech);
}
void codec2_decode(struct CODEC2 *c2, short speech[], const unsigned char *bits)
{
codec2_decode_ber(c2, speech, bits, 0.0);
}
void codec2_decode_ber(struct CODEC2 *c2, short speech[], const unsigned char *bits, float ber_est)
{
assert(c2 != NULL);
assert(c2->decode != NULL || c2->decode_ber != NULL);
if (c2->decode != NULL)
{
c2->decode(c2, speech, bits);
}
else
{
c2->decode_ber(c2, speech, bits, ber_est);
}
}
/*---------------------------------------------------------------------------*\
FUNCTION....: codec2_encode_3200
AUTHOR......: David Rowe
DATE CREATED: 13 Sep 2012
Encodes 160 speech samples (20ms of speech) into 64 bits.
The codec2 algorithm actually operates internally on 10ms (80
sample) frames, so we run the encoding algorithm twice. On the
first frame we just send the voicing bits. On the second frame we
send all model parameters. Compared to 2400 we use a larger number
of bits for the LSPs and non-VQ pitch and energy.
The bit allocation is:
Parameter bits/frame
--------------------------------------
Harmonic magnitudes (LSPs) 50
Pitch (Wo) 7
Energy 5
Voicing (10ms update) 2
TOTAL 64
\*---------------------------------------------------------------------------*/
void codec2_encode_3200(struct CODEC2 *c2, unsigned char * bits, short speech[])
{
MODEL model;
float ak[LPC_ORD+1];
float lsps[LPC_ORD];
float e;
int Wo_index, e_index;
int lspd_indexes[LPC_ORD];
int i;
unsigned int nbit = 0;
assert(c2 != NULL);
memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
/* first 10ms analysis frame - we just want voicing */
analyse_one_frame(c2, &model, speech);
pack(bits, &nbit, model.voiced, 1);
/* second 10ms analysis frame */
analyse_one_frame(c2, &model, &speech[c2->n_samp]);
pack(bits, &nbit, model.voiced, 1);
Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS);
pack(bits, &nbit, Wo_index, WO_BITS);
e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
e_index = encode_energy(e, E_BITS);
pack(bits, &nbit, e_index, E_BITS);
encode_lspds_scalar(lspd_indexes, lsps, LPC_ORD);
for(i=0; i<LSPD_SCALAR_INDEXES; i++) {
pack(bits, &nbit, lspd_indexes[i], lspd_bits(i));
}
assert(nbit == (unsigned)codec2_bits_per_frame(c2));
}
/*---------------------------------------------------------------------------*\
FUNCTION....: codec2_decode_3200
AUTHOR......: David Rowe
DATE CREATED: 13 Sep 2012
Decodes a frame of 64 bits into 160 samples (20ms) of speech.
\*---------------------------------------------------------------------------*/
void codec2_decode_3200(struct CODEC2 *c2, short speech[], const unsigned char * bits)
{
MODEL model[2];
int lspd_indexes[LPC_ORD];
float lsps[2][LPC_ORD];
int Wo_index, e_index;
float e[2];
float snr;
float ak[2][LPC_ORD+1];
int i,j;
unsigned int nbit = 0;
COMP Aw[FFT_ENC];
assert(c2 != NULL);
/* only need to zero these out due to (unused) snr calculation */
for(i=0; i<2; i++)
for(j=1; j<=MAX_AMP; j++)
model[i].A[j] = 0.0;
/* unpack bits from channel ------------------------------------*/
/* this will partially fill the model params for the 2 x 10ms
frames */
model[0].voiced = unpack(bits, &nbit, 1);
model[1].voiced = unpack(bits, &nbit, 1);
Wo_index = unpack(bits, &nbit, WO_BITS);
model[1].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS);
model[1].L = PI/model[1].Wo;
e_index = unpack(bits, &nbit, E_BITS);
e[1] = decode_energy(e_index, E_BITS);
for(i=0; i<LSPD_SCALAR_INDEXES; i++) {
lspd_indexes[i] = unpack(bits, &nbit, lspd_bits(i));
}
decode_lspds_scalar(&lsps[1][0], lspd_indexes, LPC_ORD);
/* interpolate ------------------------------------------------*/
/* Wo and energy are sampled every 20ms, so we interpolate just 1
10ms frame between 20ms samples */
interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min);
e[0] = interp_energy(c2->prev_e_dec, e[1]);
/* LSPs are sampled every 20ms so we interpolate the frame in
between, then recover spectral amplitudes */
interpolate_lsp_ver2(&lsps[0][0], c2->prev_lsps_dec, &lsps[1][0], 0.5, LPC_ORD);
for(i=0; i<2; i++) {
lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw);
apply_lpc_correction(&model[i]);
synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0);
}
/* update memories for next frame ----------------------------*/
c2->prev_model_dec = model[1];
c2->prev_e_dec = e[1];
for(i=0; i<LPC_ORD; i++)
c2->prev_lsps_dec[i] = lsps[1][i];
}
/*---------------------------------------------------------------------------*\
FUNCTION....: codec2_encode_2400
AUTHOR......: David Rowe
DATE CREATED: 21/8/2010
Encodes 160 speech samples (20ms of speech) into 48 bits.
The codec2 algorithm actually operates internally on 10ms (80
sample) frames, so we run the encoding algorithm twice. On the
first frame we just send the voicing bit. On the second frame we
send all model parameters.
The bit allocation is:
Parameter bits/frame
--------------------------------------
Harmonic magnitudes (LSPs) 36
Joint VQ of Energy and Wo 8
Voicing (10ms update) 2
Spare 2
TOTAL 48
\*---------------------------------------------------------------------------*/
void codec2_encode_2400(struct CODEC2 *c2, unsigned char * bits, short speech[])
{
MODEL model;
float ak[LPC_ORD+1];
float lsps[LPC_ORD];
float e;
int WoE_index;
int lsp_indexes[LPC_ORD];
int i;
int spare = 0;
unsigned int nbit = 0;
assert(c2 != NULL);
memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
/* first 10ms analysis frame - we just want voicing */
analyse_one_frame(c2, &model, speech);
pack(bits, &nbit, model.voiced, 1);
/* second 10ms analysis frame */
analyse_one_frame(c2, &model, &speech[c2->n_samp]);
pack(bits, &nbit, model.voiced, 1);
e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
WoE_index = encode_WoE(&model, e, c2->xq_enc);
pack(bits, &nbit, WoE_index, WO_E_BITS);
encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD);
for(i=0; i<LSP_SCALAR_INDEXES; i++) {
pack(bits, &nbit, lsp_indexes[i], lsp_bits(i));
}
pack(bits, &nbit, spare, 2);
assert(nbit == (unsigned)codec2_bits_per_frame(c2));
}
/*---------------------------------------------------------------------------*\
FUNCTION....: codec2_decode_2400
AUTHOR......: David Rowe
DATE CREATED: 21/8/2010
Decodes frames of 48 bits into 160 samples (20ms) of speech.
\*---------------------------------------------------------------------------*/
void codec2_decode_2400(struct CODEC2 *c2, short speech[], const unsigned char * bits)
{
MODEL model[2];
int lsp_indexes[LPC_ORD];
float lsps[2][LPC_ORD];
int WoE_index;
float e[2];
float snr;
float ak[2][LPC_ORD+1];
int i,j;
unsigned int nbit = 0;
COMP Aw[FFT_ENC];
assert(c2 != NULL);
/* only need to zero these out due to (unused) snr calculation */
for(i=0; i<2; i++)
for(j=1; j<=MAX_AMP; j++)
model[i].A[j] = 0.0;
/* unpack bits from channel ------------------------------------*/
/* this will partially fill the model params for the 2 x 10ms
frames */
model[0].voiced = unpack(bits, &nbit, 1);
model[1].voiced = unpack(bits, &nbit, 1);
WoE_index = unpack(bits, &nbit, WO_E_BITS);
decode_WoE(&c2->c2const, &model[1], &e[1], c2->xq_dec, WoE_index);
for(i=0; i<LSP_SCALAR_INDEXES; i++) {
lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i));
}
decode_lsps_scalar(&lsps[1][0], lsp_indexes, LPC_ORD);
check_lsp_order(&lsps[1][0], LPC_ORD);
bw_expand_lsps(&lsps[1][0], LPC_ORD, 50.0, 100.0);
/* interpolate ------------------------------------------------*/
/* Wo and energy are sampled every 20ms, so we interpolate just 1
10ms frame between 20ms samples */
interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min);
e[0] = interp_energy(c2->prev_e_dec, e[1]);
/* LSPs are sampled every 20ms so we interpolate the frame in
between, then recover spectral amplitudes */
interpolate_lsp_ver2(&lsps[0][0], c2->prev_lsps_dec, &lsps[1][0], 0.5, LPC_ORD);
for(i=0; i<2; i++) {
lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw);
apply_lpc_correction(&model[i]);
synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0);
/* dump parameters for deep learning experiments */
if (c2->flspEWov != NULL) {
/* 10 LSPs - energy - Wo - voicing flag - 10 LPCs */
fwrite(&lsps[i][0], LPC_ORD, sizeof(float), c2->flspEWov);
fwrite(&e[i], 1, sizeof(float), c2->flspEWov);
fwrite(&model[i].Wo, 1, sizeof(float), c2->flspEWov);
float voiced_float = model[i].voiced;
fwrite(&voiced_float, 1, sizeof(float), c2->flspEWov);
fwrite(&ak[i][1], LPC_ORD, sizeof(float), c2->flspEWov);
}
}
/* update memories for next frame ----------------------------*/
c2->prev_model_dec = model[1];
c2->prev_e_dec = e[1];
for(i=0; i<LPC_ORD; i++)
c2->prev_lsps_dec[i] = lsps[1][i];
}
/*---------------------------------------------------------------------------*\
FUNCTION....: codec2_encode_1600
AUTHOR......: David Rowe
DATE CREATED: Feb 28 2013
Encodes 320 speech samples (40ms of speech) into 64 bits.
The codec2 algorithm actually operates internally on 10ms (80
sample) frames, so we run the encoding algorithm 4 times:
frame 0: voicing bit
frame 1: voicing bit, Wo and E
frame 2: voicing bit
frame 3: voicing bit, Wo and E, scalar LSPs
The bit allocation is:
Parameter frame 2 frame 4 Total
-------------------------------------------------------
Harmonic magnitudes (LSPs) 0 36 36
Pitch (Wo) 7 7 14
Energy 5 5 10
Voicing (10ms update) 2 2 4
TOTAL 14 50 64
\*---------------------------------------------------------------------------*/
void codec2_encode_1600(struct CODEC2 *c2, unsigned char * bits, short speech[])
{
MODEL model;
float lsps[LPC_ORD];
float ak[LPC_ORD+1];
float e;
int lsp_indexes[LPC_ORD];
int Wo_index, e_index;
int i;
unsigned int nbit = 0;
assert(c2 != NULL);
memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
/* frame 1: - voicing ---------------------------------------------*/
analyse_one_frame(c2, &model, speech);
pack(bits, &nbit, model.voiced, 1);
/* frame 2: - voicing, scalar Wo & E -------------------------------*/
analyse_one_frame(c2, &model, &speech[c2->n_samp]);
pack(bits, &nbit, model.voiced, 1);
Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS);
pack(bits, &nbit, Wo_index, WO_BITS);
/* need to run this just to get LPC energy */
e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
e_index = encode_energy(e, E_BITS);
pack(bits, &nbit, e_index, E_BITS);
/* frame 3: - voicing ---------------------------------------------*/
analyse_one_frame(c2, &model, &speech[2*c2->n_samp]);
pack(bits, &nbit, model.voiced, 1);
/* frame 4: - voicing, scalar Wo & E, scalar LSPs ------------------*/
analyse_one_frame(c2, &model, &speech[3*c2->n_samp]);
pack(bits, &nbit, model.voiced, 1);
Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS);
pack(bits, &nbit, Wo_index, WO_BITS);
e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
e_index = encode_energy(e, E_BITS);
pack(bits, &nbit, e_index, E_BITS);
encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD);
for(i=0; i<LSP_SCALAR_INDEXES; i++) {
pack(bits, &nbit, lsp_indexes[i], lsp_bits(i));
}
assert(nbit == (unsigned)codec2_bits_per_frame(c2));
}
/*---------------------------------------------------------------------------*\
FUNCTION....: codec2_decode_1600
AUTHOR......: David Rowe
DATE CREATED: 11 May 2012
Decodes frames of 64 bits into 320 samples (40ms) of speech.
\*---------------------------------------------------------------------------*/
void codec2_decode_1600(struct CODEC2 *c2, short speech[], const unsigned char * bits)
{
MODEL model[4];
int lsp_indexes[LPC_ORD];
float lsps[4][LPC_ORD];
int Wo_index, e_index;
float e[4];
float snr;
float ak[4][LPC_ORD+1];
int i,j;
unsigned int nbit = 0;
float weight;
COMP Aw[FFT_ENC];
assert(c2 != NULL);
/* only need to zero these out due to (unused) snr calculation */
for(i=0; i<4; i++)
for(j=1; j<=MAX_AMP; j++)
model[i].A[j] = 0.0;
/* unpack bits from channel ------------------------------------*/
/* this will partially fill the model params for the 4 x 10ms
frames */
model[0].voiced = unpack(bits, &nbit, 1);
model[1].voiced = unpack(bits, &nbit, 1);
Wo_index = unpack(bits, &nbit, WO_BITS);
model[1].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS);
model[1].L = PI/model[1].Wo;
e_index = unpack(bits, &nbit, E_BITS);
e[1] = decode_energy(e_index, E_BITS);
model[2].voiced = unpack(bits, &nbit, 1);
model[3].voiced = unpack(bits, &nbit, 1);
Wo_index = unpack(bits, &nbit, WO_BITS);
model[3].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS);
model[3].L = PI/model[3].Wo;
e_index = unpack(bits, &nbit, E_BITS);
e[3] = decode_energy(e_index, E_BITS);
for(i=0; i<LSP_SCALAR_INDEXES; i++) {
lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i));
}
decode_lsps_scalar(&lsps[3][0], lsp_indexes, LPC_ORD);
check_lsp_order(&lsps[3][0], LPC_ORD);
bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0);
/* interpolate ------------------------------------------------*/
/* Wo and energy are sampled every 20ms, so we interpolate just 1
10ms frame between 20ms samples */
interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min);
e[0] = interp_energy(c2->prev_e_dec, e[1]);
interp_Wo(&model[2], &model[1], &model[3], c2->c2const.Wo_min);
e[2] = interp_energy(e[1], e[3]);
/* LSPs are sampled every 40ms so we interpolate the 3 frames in
between, then recover spectral amplitudes */
for(i=0, weight=0.25; i<3; i++, weight += 0.25) {
interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD);
}
for(i=0; i<4; i++) {
lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw);
apply_lpc_correction(&model[i]);
synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0);
}
/* update memories for next frame ----------------------------*/
c2->prev_model_dec = model[3];
c2->prev_e_dec = e[3];
for(i=0; i<LPC_ORD; i++)
c2->prev_lsps_dec[i] = lsps[3][i];
}
/*---------------------------------------------------------------------------*\
FUNCTION....: codec2_encode_1400
AUTHOR......: David Rowe
DATE CREATED: May 11 2012
Encodes 320 speech samples (40ms of speech) into 56 bits.
The codec2 algorithm actually operates internally on 10ms (80
sample) frames, so we run the encoding algorithm 4 times:
frame 0: voicing bit
frame 1: voicing bit, joint VQ of Wo and E
frame 2: voicing bit
frame 3: voicing bit, joint VQ of Wo and E, scalar LSPs
The bit allocation is:
Parameter frame 2 frame 4 Total
-------------------------------------------------------
Harmonic magnitudes (LSPs) 0 36 36
Energy+Wo 8 8 16
Voicing (10ms update) 2 2 4
TOTAL 10 46 56
\*---------------------------------------------------------------------------*/
void codec2_encode_1400(struct CODEC2 *c2, unsigned char * bits, short speech[])
{
MODEL model;
float lsps[LPC_ORD];
float ak[LPC_ORD+1];
float e;
int lsp_indexes[LPC_ORD];
int WoE_index;
int i;
unsigned int nbit = 0;
assert(c2 != NULL);
memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
/* frame 1: - voicing ---------------------------------------------*/
analyse_one_frame(c2, &model, speech);
pack(bits, &nbit, model.voiced, 1);
/* frame 2: - voicing, joint Wo & E -------------------------------*/
analyse_one_frame(c2, &model, &speech[c2->n_samp]);
pack(bits, &nbit, model.voiced, 1);
/* need to run this just to get LPC energy */
e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
WoE_index = encode_WoE(&model, e, c2->xq_enc);
pack(bits, &nbit, WoE_index, WO_E_BITS);
/* frame 3: - voicing ---------------------------------------------*/
analyse_one_frame(c2, &model, &speech[2*c2->n_samp]);
pack(bits, &nbit, model.voiced, 1);
/* frame 4: - voicing, joint Wo & E, scalar LSPs ------------------*/
analyse_one_frame(c2, &model, &speech[3*c2->n_samp]);
pack(bits, &nbit, model.voiced, 1);
e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
WoE_index = encode_WoE(&model, e, c2->xq_enc);
pack(bits, &nbit, WoE_index, WO_E_BITS);
encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD);
for(i=0; i<LSP_SCALAR_INDEXES; i++) {
pack(bits, &nbit, lsp_indexes[i], lsp_bits(i));
}
assert(nbit == (unsigned)codec2_bits_per_frame(c2));
}
/*---------------------------------------------------------------------------*\
FUNCTION....: codec2_decode_1400
AUTHOR......: David Rowe
DATE CREATED: 11 May 2012
Decodes frames of 56 bits into 320 samples (40ms) of speech.
\*---------------------------------------------------------------------------*/
void codec2_decode_1400(struct CODEC2 *c2, short speech[], const unsigned char * bits)
{
MODEL model[4];
int lsp_indexes[LPC_ORD];
float lsps[4][LPC_ORD];
int WoE_index;
float e[4];
float snr;
float ak[4][LPC_ORD+1];
int i,j;
unsigned int nbit = 0;
float weight;
COMP Aw[FFT_ENC];
assert(c2 != NULL);
/* only need to zero these out due to (unused) snr calculation */
for(i=0; i<4; i++)
for(j=1; j<=MAX_AMP; j++)
model[i].A[j] = 0.0;
/* unpack bits from channel ------------------------------------*/
/* this will partially fill the model params for the 4 x 10ms
frames */
model[0].voiced = unpack(bits, &nbit, 1);
model[1].voiced = unpack(bits, &nbit, 1);
WoE_index = unpack(bits, &nbit, WO_E_BITS);
decode_WoE(&c2->c2const, &model[1], &e[1], c2->xq_dec, WoE_index);
model[2].voiced = unpack(bits, &nbit, 1);
model[3].voiced = unpack(bits, &nbit, 1);
WoE_index = unpack(bits, &nbit, WO_E_BITS);
decode_WoE(&c2->c2const, &model[3], &e[3], c2->xq_dec, WoE_index);
for(i=0; i<LSP_SCALAR_INDEXES; i++) {
lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i));
}
decode_lsps_scalar(&lsps[3][0], lsp_indexes, LPC_ORD);
check_lsp_order(&lsps[3][0], LPC_ORD);
bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0);
/* interpolate ------------------------------------------------*/
/* Wo and energy are sampled every 20ms, so we interpolate just 1
10ms frame between 20ms samples */
interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min);
e[0] = interp_energy(c2->prev_e_dec, e[1]);
interp_Wo(&model[2], &model[1], &model[3], c2->c2const.Wo_min);
e[2] = interp_energy(e[1], e[3]);
/* LSPs are sampled every 40ms so we interpolate the 3 frames in
between, then recover spectral amplitudes */
for(i=0, weight=0.25; i<3; i++, weight += 0.25) {
interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD);
}
for(i=0; i<4; i++) {
lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw);
apply_lpc_correction(&model[i]);
synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0);
}
/* update memories for next frame ----------------------------*/
c2->prev_model_dec = model[3];
c2->prev_e_dec = e[3];
for(i=0; i<LPC_ORD; i++)
c2->prev_lsps_dec[i] = lsps[3][i];
}
/*---------------------------------------------------------------------------*\
FUNCTION....: codec2_encode_1300
AUTHOR......: David Rowe
DATE CREATED: March 14 2013
Encodes 320 speech samples (40ms of speech) into 52 bits.
The codec2 algorithm actually operates internally on 10ms (80
sample) frames, so we run the encoding algorithm 4 times:
frame 0: voicing bit
frame 1: voicing bit,
frame 2: voicing bit
frame 3: voicing bit, Wo and E, scalar LSPs
The bit allocation is:
Parameter frame 2 frame 4 Total
-------------------------------------------------------
Harmonic magnitudes (LSPs) 0 36 36
Pitch (Wo) 0 7 7
Energy 0 5 5
Voicing (10ms update) 2 2 4
TOTAL 2 50 52
\*---------------------------------------------------------------------------*/
void codec2_encode_1300(struct CODEC2 *c2, unsigned char * bits, short speech[])
{
MODEL model;
float lsps[LPC_ORD];
float ak[LPC_ORD+1];
float e;
int lsp_indexes[LPC_ORD];
int Wo_index, e_index;
int i;
unsigned int nbit = 0;
//#ifdef PROFILE
//unsigned int quant_start;
//#endif
assert(c2 != NULL);
memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
/* frame 1: - voicing ---------------------------------------------*/
analyse_one_frame(c2, &model, speech);
pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray);
/* frame 2: - voicing ---------------------------------------------*/
analyse_one_frame(c2, &model, &speech[c2->n_samp]);
pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray);
/* frame 3: - voicing ---------------------------------------------*/
analyse_one_frame(c2, &model, &speech[2*c2->n_samp]);
pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray);
/* frame 4: - voicing, scalar Wo & E, scalar LSPs ------------------*/
analyse_one_frame(c2, &model, &speech[3*c2->n_samp]);
pack_natural_or_gray(bits, &nbit, model.voiced, 1, c2->gray);
Wo_index = encode_Wo(&c2->c2const, model.Wo, WO_BITS);
pack_natural_or_gray(bits, &nbit, Wo_index, WO_BITS, c2->gray);
//#ifdef PROFILE
//quant_start = machdep_profile_sample();
//#endif
e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
e_index = encode_energy(e, E_BITS);
pack_natural_or_gray(bits, &nbit, e_index, E_BITS, c2->gray);
encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD);
for(i=0; i<LSP_SCALAR_INDEXES; i++) {
pack_natural_or_gray(bits, &nbit, lsp_indexes[i], lsp_bits(i), c2->gray);
}
//#ifdef PROFILE
//machdep_profile_sample_and_log(quant_start, " quant/packing");
//#endif
assert(nbit == (unsigned)codec2_bits_per_frame(c2));
}
/*---------------------------------------------------------------------------*\
FUNCTION....: codec2_decode_1300
AUTHOR......: David Rowe
DATE CREATED: 11 May 2012
Decodes frames of 52 bits into 320 samples (40ms) of speech.
\*---------------------------------------------------------------------------*/
static int frames;
void codec2_decode_1300(struct CODEC2 *c2, short speech[], const unsigned char * bits, float ber_est)
{
MODEL model[4];
int lsp_indexes[LPC_ORD];
float lsps[4][LPC_ORD];
int Wo_index, e_index;
float e[4];
float snr;
float ak[4][LPC_ORD+1];
int i,j;
unsigned int nbit = 0;
float weight;
COMP Aw[FFT_ENC];
//PROFILE_VAR(recover_start);
assert(c2 != NULL);
frames+= 4;
/* only need to zero these out due to (unused) snr calculation */
for(i=0; i<4; i++)
for(j=1; j<=MAX_AMP; j++)
model[i].A[j] = 0.0;
/* unpack bits from channel ------------------------------------*/
/* this will partially fill the model params for the 4 x 10ms
frames */
model[0].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray);
model[1].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray);
model[2].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray);
model[3].voiced = unpack_natural_or_gray(bits, &nbit, 1, c2->gray);
Wo_index = unpack_natural_or_gray(bits, &nbit, WO_BITS, c2->gray);
model[3].Wo = decode_Wo(&c2->c2const, Wo_index, WO_BITS);
model[3].L = PI/model[3].Wo;
e_index = unpack_natural_or_gray(bits, &nbit, E_BITS, c2->gray);
e[3] = decode_energy(e_index, E_BITS);
//fprintf(stderr, "%d %f\n", e_index, e[3]);
for(i=0; i<LSP_SCALAR_INDEXES; i++) {
lsp_indexes[i] = unpack_natural_or_gray(bits, &nbit, lsp_bits(i), c2->gray);
}
decode_lsps_scalar(&lsps[3][0], lsp_indexes, LPC_ORD);
check_lsp_order(&lsps[3][0], LPC_ORD);
bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0);
if (ber_est > 0.15) {
model[0].voiced = model[1].voiced = model[2].voiced = model[3].voiced = 0;
e[3] = decode_energy(10, E_BITS);
bw_expand_lsps(&lsps[3][0], LPC_ORD, 200.0, 200.0);
//fprintf(stderr, "soft mute\n");
}
/* interpolate ------------------------------------------------*/
/* Wo, energy, and LSPs are sampled every 40ms so we interpolate
the 3 frames in between */
//PROFILE_SAMPLE(recover_start);
for(i=0, weight=0.25; i<3; i++, weight += 0.25) {
interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD);
interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight, c2->c2const.Wo_min);
e[i] = interp_energy2(c2->prev_e_dec, e[3],weight);
}
/* then recover spectral amplitudes */
for(i=0; i<4; i++) {
lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw);
apply_lpc_correction(&model[i]);
synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0);
/* dump parameters for deep learning experiments */
if (c2->flspEWov != NULL) {
/* 10 LSPs - energy - Wo - voicing flag - 10 LPCs */
fwrite(&lsps[i][0], LPC_ORD, sizeof(float), c2->flspEWov);
fwrite(&e[i], 1, sizeof(float), c2->flspEWov);
fwrite(&model[i].Wo, 1, sizeof(float), c2->flspEWov);
float voiced_float = model[i].voiced;
fwrite(&voiced_float, 1, sizeof(float), c2->flspEWov);
fwrite(&ak[i][1], LPC_ORD, sizeof(float), c2->flspEWov);
}
}
/*
for(i=0; i<4; i++) {
printf("%d Wo: %f L: %d v: %d\n", frames, model[i].Wo, model[i].L, model[i].voiced);
}
if (frames == 4*50)
exit(0);
*/
//PROFILE_SAMPLE_AND_LOG2(recover_start, " recover");
#ifdef DUMP
dump_lsp_(&lsps[3][0]);
dump_ak_(&ak[3][0], LPC_ORD);
#endif
/* update memories for next frame ----------------------------*/
c2->prev_model_dec = model[3];
c2->prev_e_dec = e[3];
for(i=0; i<LPC_ORD; i++)
c2->prev_lsps_dec[i] = lsps[3][i];
}
/*---------------------------------------------------------------------------*\
FUNCTION....: codec2_encode_1200
AUTHOR......: David Rowe
DATE CREATED: Nov 14 2011
Encodes 320 speech samples (40ms of speech) into 48 bits.
The codec2 algorithm actually operates internally on 10ms (80
sample) frames, so we run the encoding algorithm four times:
frame 0: voicing bit
frame 1: voicing bit, joint VQ of Wo and E
frame 2: voicing bit
frame 3: voicing bit, joint VQ of Wo and E, VQ LSPs
The bit allocation is:
Parameter frame 2 frame 4 Total
-------------------------------------------------------
Harmonic magnitudes (LSPs) 0 27 27
Energy+Wo 8 8 16
Voicing (10ms update) 2 2 4
Spare 0 1 1
TOTAL 10 38 48
\*---------------------------------------------------------------------------*/
void codec2_encode_1200(struct CODEC2 *c2, unsigned char * bits, short speech[])
{
MODEL model;
float lsps[LPC_ORD];
float lsps_[LPC_ORD];
float ak[LPC_ORD+1];
float e;
int lsp_indexes[LPC_ORD];
int WoE_index;
int i;
int spare = 0;
unsigned int nbit = 0;
assert(c2 != NULL);
memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
/* frame 1: - voicing ---------------------------------------------*/
analyse_one_frame(c2, &model, speech);
pack(bits, &nbit, model.voiced, 1);
/* frame 2: - voicing, joint Wo & E -------------------------------*/
analyse_one_frame(c2, &model, &speech[c2->n_samp]);
pack(bits, &nbit, model.voiced, 1);
/* need to run this just to get LPC energy */
e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
WoE_index = encode_WoE(&model, e, c2->xq_enc);
pack(bits, &nbit, WoE_index, WO_E_BITS);
/* frame 3: - voicing ---------------------------------------------*/
analyse_one_frame(c2, &model, &speech[2*c2->n_samp]);
pack(bits, &nbit, model.voiced, 1);
/* frame 4: - voicing, joint Wo & E, scalar LSPs ------------------*/
analyse_one_frame(c2, &model, &speech[3*c2->n_samp]);
pack(bits, &nbit, model.voiced, 1);
e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD);
WoE_index = encode_WoE(&model, e, c2->xq_enc);
pack(bits, &nbit, WoE_index, WO_E_BITS);
encode_lsps_vq(lsp_indexes, lsps, lsps_, LPC_ORD);
for(i=0; i<LSP_PRED_VQ_INDEXES; i++) {
pack(bits, &nbit, lsp_indexes[i], lsp_pred_vq_bits(i));
}
pack(bits, &nbit, spare, 1);
assert(nbit == (unsigned)codec2_bits_per_frame(c2));
}
/*---------------------------------------------------------------------------*\
FUNCTION....: codec2_decode_1200
AUTHOR......: David Rowe
DATE CREATED: 14 Feb 2012
Decodes frames of 48 bits into 320 samples (40ms) of speech.
\*---------------------------------------------------------------------------*/
void codec2_decode_1200(struct CODEC2 *c2, short speech[], const unsigned char * bits)
{
MODEL model[4];
int lsp_indexes[LPC_ORD];
float lsps[4][LPC_ORD];
int WoE_index;
float e[4];
float snr;
float ak[4][LPC_ORD+1];
int i,j;
unsigned int nbit = 0;
float weight;
COMP Aw[FFT_ENC];
assert(c2 != NULL);
/* only need to zero these out due to (unused) snr calculation */
for(i=0; i<4; i++)
for(j=1; j<=MAX_AMP; j++)
model[i].A[j] = 0.0;
/* unpack bits from channel ------------------------------------*/
/* this will partially fill the model params for the 4 x 10ms
frames */
model[0].voiced = unpack(bits, &nbit, 1);
model[1].voiced = unpack(bits, &nbit, 1);
WoE_index = unpack(bits, &nbit, WO_E_BITS);
decode_WoE(&c2->c2const, &model[1], &e[1], c2->xq_dec, WoE_index);
model[2].voiced = unpack(bits, &nbit, 1);
model[3].voiced = unpack(bits, &nbit, 1);
WoE_index = unpack(bits, &nbit, WO_E_BITS);
decode_WoE(&c2->c2const, &model[3], &e[3], c2->xq_dec, WoE_index);
for(i=0; i<LSP_PRED_VQ_INDEXES; i++) {
lsp_indexes[i] = unpack(bits, &nbit, lsp_pred_vq_bits(i));
}
decode_lsps_vq(lsp_indexes, &lsps[3][0], LPC_ORD , 0);
check_lsp_order(&lsps[3][0], LPC_ORD);
bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0);
/* interpolate ------------------------------------------------*/
/* Wo and energy are sampled every 20ms, so we interpolate just 1
10ms frame between 20ms samples */
interp_Wo(&model[0], &c2->prev_model_dec, &model[1], c2->c2const.Wo_min);
e[0] = interp_energy(c2->prev_e_dec, e[1]);
interp_Wo(&model[2], &model[1], &model[3], c2->c2const.Wo_min);
e[2] = interp_energy(e[1], e[3]);
/* LSPs are sampled every 40ms so we interpolate the 3 frames in
between, then recover spectral amplitudes */
for(i=0, weight=0.25; i<3; i++, weight += 0.25) {
interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD);
}
for(i=0; i<4; i++) {
lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw);
apply_lpc_correction(&model[i]);
synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0);
}
/* update memories for next frame ----------------------------*/
c2->prev_model_dec = model[3];
c2->prev_e_dec = e[3];
for(i=0; i<LPC_ORD; i++)
c2->prev_lsps_dec[i] = lsps[3][i];
}
#ifndef CORTEX_M4
/*---------------------------------------------------------------------------*\
FUNCTION....: codec2_encode_700
AUTHOR......: David Rowe
DATE CREATED: April 2015
Encodes 320 speech samples (40ms of speech) into 28 bits.
The codec2 algorithm actually operates internally on 10ms (80
sample) frames, so we run the encoding algorithm four times:
frame 0: nothing
frame 1: nothing
frame 2: nothing
frame 3: voicing bit, scalar Wo and E, 17 bit LSP MEL scalar, 2 spare
The bit allocation is:
Parameter frames 1-3 frame 4 Total
-----------------------------------------------------------
Harmonic magnitudes (LSPs) 0 17 17
Energy 0 3 3
log Wo 0 5 5
Voicing 0 1 1
spare 0 2 2
TOTAL 0 28 28
\*---------------------------------------------------------------------------*/
void codec2_encode_700(struct CODEC2 *c2, unsigned char * bits, short speech[])
{
MODEL model;
float lsps[LPC_ORD_LOW];
float mel[LPC_ORD_LOW];
float ak[LPC_ORD_LOW+1];
float e, f;
int indexes[LPC_ORD_LOW];
int Wo_index, e_index, i;
unsigned int nbit = 0;
float bpf_out[4*c2->n_samp];
short bpf_speech[4*c2->n_samp];
int spare = 0;
assert(c2 != NULL);
memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
/* band pass filter */
for(i=0; i<BPF_N; i++)
c2->bpf_buf[i] = c2->bpf_buf[4*c2->n_samp+i];
for(i=0; i<4*c2->n_samp; i++)
c2->bpf_buf[BPF_N+i] = speech[i];
inverse_filter(&c2->bpf_buf[BPF_N], bpf, 4*c2->n_samp, bpf_out, BPF_N-1);
for(i=0; i<4*c2->n_samp; i++)
bpf_speech[i] = bpf_out[i];
/* frame 1 --------------------------------------------------------*/
analyse_one_frame(c2, &model, bpf_speech);
/* frame 2 --------------------------------------------------------*/
analyse_one_frame(c2, &model, &bpf_speech[c2->n_samp]);
/* frame 3 --------------------------------------------------------*/
analyse_one_frame(c2, &model, &bpf_speech[2*c2->n_samp]);
/* frame 4: - voicing, scalar Wo & E, scalar LSPs -----------------*/
analyse_one_frame(c2, &model, &bpf_speech[3*c2->n_samp]);
pack(bits, &nbit, model.voiced, 1);
Wo_index = encode_log_Wo(&c2->c2const, model.Wo, 5);
pack_natural_or_gray(bits, &nbit, Wo_index, 5, c2->gray);
e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD_LOW);
e_index = encode_energy(e, 3);
pack_natural_or_gray(bits, &nbit, e_index, 3, c2->gray);
for(i=0; i<LPC_ORD_LOW; i++) {
f = (4000.0/PI)*lsps[i];
mel[i] = floor(2595.0*log10(1.0 + f/700.0) + 0.5);
}
encode_mels_scalar(indexes, mel, LPC_ORD_LOW);
for(i=0; i<LPC_ORD_LOW; i++) {
pack_natural_or_gray(bits, &nbit, indexes[i], mel_bits(i), c2->gray);
}
pack_natural_or_gray(bits, &nbit, spare, 2, c2->gray);
assert(nbit == (unsigned)codec2_bits_per_frame(c2));
}
/*---------------------------------------------------------------------------*\
FUNCTION....: codec2_decode_700
AUTHOR......: David Rowe
DATE CREATED: April 2015
Decodes frames of 28 bits into 320 samples (40ms) of speech.
\*---------------------------------------------------------------------------*/
void codec2_decode_700(struct CODEC2 *c2, short speech[], const unsigned char * bits)
{
MODEL model[4];
int indexes[LPC_ORD_LOW];
float mel[LPC_ORD_LOW];
float lsps[4][LPC_ORD_LOW];
int Wo_index, e_index;
float e[4];
float snr, f_;
float ak[4][LPC_ORD_LOW+1];
int i,j;
unsigned int nbit = 0;
float weight;
COMP Aw[FFT_ENC];
assert(c2 != NULL);
/* only need to zero these out due to (unused) snr calculation */
for(i=0; i<4; i++)
for(j=1; j<=MAX_AMP; j++)
model[i].A[j] = 0.0;
/* unpack bits from channel ------------------------------------*/
model[3].voiced = unpack(bits, &nbit, 1);
model[0].voiced = model[1].voiced = model[2].voiced = model[3].voiced;
Wo_index = unpack_natural_or_gray(bits, &nbit, 5, c2->gray);
model[3].Wo = decode_log_Wo(&c2->c2const, Wo_index, 5);
model[3].L = PI/model[3].Wo;
e_index = unpack_natural_or_gray(bits, &nbit, 3, c2->gray);
e[3] = decode_energy(e_index, 3);
for(i=0; i<LPC_ORD_LOW; i++) {
indexes[i] = unpack_natural_or_gray(bits, &nbit, mel_bits(i), c2->gray);
}
decode_mels_scalar(mel, indexes, LPC_ORD_LOW);
for(i=0; i<LPC_ORD_LOW; i++) {
f_ = 700.0*( pow(10.0, (float)mel[i]/2595.0) - 1.0);
lsps[3][i] = f_*(PI/4000.0);
//printf("lsps[3][%d] %f\n", i, lsps[3][i]);
}
check_lsp_order(&lsps[3][0], LPC_ORD_LOW);
bw_expand_lsps(&lsps[3][0], LPC_ORD_LOW, 50.0, 100.0);
#ifdef MASK_NOT_FOR_NOW
/* first pass at soft decn error masking, needs further work */
/* If soft dec info available expand further for low power frames */
if (c2->softdec) {
float e = 0.0;
for(i=9; i<9+17; i++)
e += c2->softdec[i]*c2->softdec[i];
e /= 6.0;
//fprintf(stderr, "e: %f\n", e);
//if (e < 0.3)
// bw_expand_lsps(&lsps[3][0], LPC_ORD_LOW, 150.0, 300.0);
}
#endif
/* interpolate ------------------------------------------------*/
/* LSPs, Wo, and energy are sampled every 40ms so we interpolate
the 3 frames in between, then recover spectral amplitudes */
for(i=0, weight=0.25; i<3; i++, weight += 0.25) {
interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD_LOW);
interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight, c2->c2const.Wo_min);
e[i] = interp_energy2(c2->prev_e_dec, e[3],weight);
}
for(i=0; i<4; i++) {
lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD_LOW);
aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD_LOW, &model[i], e[i], &snr, 0, 0,
c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw);
apply_lpc_correction(&model[i]);
synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0);
}
#ifdef DUMP
dump_lsp_(&lsps[3][0]);
dump_ak_(&ak[3][0], LPC_ORD_LOW);
dump_model(&model[3]);
if (c2->softdec)
dump_softdec(c2->softdec, nbit);
#endif
/* update memories for next frame ----------------------------*/
c2->prev_model_dec = model[3];
c2->prev_e_dec = e[3];
for(i=0; i<LPC_ORD_LOW; i++)
c2->prev_lsps_dec[i] = lsps[3][i];
}
/*---------------------------------------------------------------------------*\
FUNCTION....: codec2_encode_700b
AUTHOR......: David Rowe
DATE CREATED: August 2015
Version b of 700 bit/s codec. After some experiments over the air I
wanted was unhappy with the rate 700 codec so spent a few weeks
trying to improve the speech quality. This version uses a wider BPF
and vector quantised mel-lsps.
Encodes 320 speech samples (40ms of speech) into 28 bits.
The codec2 algorithm actually operates internally on 10ms (80
sample) frames, so we run the encoding algorithm four times:
frame 0: nothing
frame 1: nothing
frame 2: nothing
frame 3: voicing bit, 5 bit scalar Wo and 3 bit E, 18 bit LSP MEL VQ,
1 spare
The bit allocation is:
Parameter frames 1-3 frame 4 Total
-----------------------------------------------------------
Harmonic magnitudes (LSPs) 0 18 18
Energy 0 3 3
log Wo 0 5 5
Voicing 0 1 1
spare 0 1 1
TOTAL 0 28 28
\*---------------------------------------------------------------------------*/
void codec2_encode_700b(struct CODEC2 *c2, unsigned char * bits, short speech[])
{
MODEL model;
float lsps[LPC_ORD_LOW];
float mel[LPC_ORD_LOW];
float mel_[LPC_ORD_LOW];
float ak[LPC_ORD_LOW+1];
float e, f;
int indexes[3];
int Wo_index, e_index, i;
unsigned int nbit = 0;
float bpf_out[4*c2->n_samp];
short bpf_speech[4*c2->n_samp];
int spare = 0;
assert(c2 != NULL);
memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
/* band pass filter */
for(i=0; i<BPF_N; i++)
c2->bpf_buf[i] = c2->bpf_buf[4*c2->n_samp+i];
for(i=0; i<4*c2->n_samp; i++)
c2->bpf_buf[BPF_N+i] = speech[i];
inverse_filter(&c2->bpf_buf[BPF_N], bpfb, 4*c2->n_samp, bpf_out, BPF_N-1);
for(i=0; i<4*c2->n_samp; i++)
bpf_speech[i] = bpf_out[i];
/* frame 1 --------------------------------------------------------*/
analyse_one_frame(c2, &model, bpf_speech);
/* frame 2 --------------------------------------------------------*/
analyse_one_frame(c2, &model, &bpf_speech[c2->n_samp]);
/* frame 3 --------------------------------------------------------*/
analyse_one_frame(c2, &model, &bpf_speech[2*c2->n_samp]);
/* frame 4: - voicing, scalar Wo & E, VQ mel LSPs -----------------*/
analyse_one_frame(c2, &model, &bpf_speech[3*c2->n_samp]);
pack(bits, &nbit, model.voiced, 1);
Wo_index = encode_log_Wo(&c2->c2const, model.Wo, 5);
pack_natural_or_gray(bits, &nbit, Wo_index, 5, c2->gray);
e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, c2->m_pitch, LPC_ORD_LOW);
e_index = encode_energy(e, 3);
pack_natural_or_gray(bits, &nbit, e_index, 3, c2->gray);
for(i=0; i<LPC_ORD_LOW; i++) {
f = (4000.0/PI)*lsps[i];
mel[i] = floor(2595.0*log10(1.0 + f/700.0) + 0.5);
}
lspmelvq_mbest_encode(indexes, mel, mel_, LPC_ORD_LOW, 5);
for(i=0; i<3; i++) {
pack_natural_or_gray(bits, &nbit, indexes[i], lspmelvq_cb_bits(i), c2->gray);
}
pack_natural_or_gray(bits, &nbit, spare, 1, c2->gray);
assert(nbit == (unsigned)codec2_bits_per_frame(c2));
}
/*---------------------------------------------------------------------------*\
FUNCTION....: codec2_decode_700b
AUTHOR......: David Rowe
DATE CREATED: August 2015
Decodes frames of 28 bits into 320 samples (40ms) of speech.
\*---------------------------------------------------------------------------*/
void codec2_decode_700b(struct CODEC2 *c2, short speech[], const unsigned char * bits)
{
MODEL model[4];
int indexes[3];
float mel[LPC_ORD_LOW];
float lsps[4][LPC_ORD_LOW];
int Wo_index, e_index;
float e[4];
float snr, f_;
float ak[4][LPC_ORD_LOW+1];
int i,j;
unsigned int nbit = 0;
float weight;
COMP Aw[FFT_ENC];
assert(c2 != NULL);
/* only need to zero these out due to (unused) snr calculation */
for(i=0; i<4; i++)
for(j=1; j<=MAX_AMP; j++)
model[i].A[j] = 0.0;
/* unpack bits from channel ------------------------------------*/
model[3].voiced = unpack(bits, &nbit, 1);
model[0].voiced = model[1].voiced = model[2].voiced = model[3].voiced;
Wo_index = unpack_natural_or_gray(bits, &nbit, 5, c2->gray);
model[3].Wo = decode_log_Wo(&c2->c2const, Wo_index, 5);
model[3].L = PI/model[3].Wo;
e_index = unpack_natural_or_gray(bits, &nbit, 3, c2->gray);
e[3] = decode_energy(e_index, 3);
for(i=0; i<3; i++) {
indexes[i] = unpack_natural_or_gray(bits, &nbit, lspmelvq_cb_bits(i), c2->gray);
}
lspmelvq_decode(indexes, mel, LPC_ORD_LOW);
#define MEL_ROUND 10
for(i=1; i<LPC_ORD_LOW; i++) {
if (mel[i] <= mel[i-1]+MEL_ROUND) {
mel[i]+=MEL_ROUND/2;
mel[i-1]-=MEL_ROUND/2;
i = 1;
}
}
for(i=0; i<LPC_ORD_LOW; i++) {
f_ = 700.0*( pow(10.0, (float)mel[i]/2595.0) - 1.0);
lsps[3][i] = f_*(PI/4000.0);
//printf("lsps[3][%d] %f\n", i, lsps[3][i]);
}
/* interpolate ------------------------------------------------*/
/* LSPs, Wo, and energy are sampled every 40ms so we interpolate
the 3 frames in between, then recover spectral amplitudes */
for(i=0, weight=0.25; i<3; i++, weight += 0.25) {
interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD_LOW);
interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight, c2->c2const.Wo_min);
e[i] = interp_energy2(c2->prev_e_dec, e[3],weight);
}
for(i=0; i<4; i++) {
lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD_LOW);
aks_to_M2(c2->fftr_fwd_cfg, &ak[i][0], LPC_ORD_LOW, &model[i], e[i], &snr, 0, 0,
c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw);
apply_lpc_correction(&model[i]);
synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], Aw, 1.0);
}
#ifdef DUMP
dump_lsp_(&lsps[3][0]);
dump_ak_(&ak[3][0], LPC_ORD_LOW);
dump_model(&model[3]);
if (c2->softdec)
dump_softdec(c2->softdec, nbit);
#endif
/* update memories for next frame ----------------------------*/
c2->prev_model_dec = model[3];
c2->prev_e_dec = e[3];
for(i=0; i<LPC_ORD_LOW; i++)
c2->prev_lsps_dec[i] = lsps[3][i];
}
#endif
/*---------------------------------------------------------------------------*\
FUNCTION....: codec2_encode_700c
AUTHOR......: David Rowe
DATE CREATED: Jan 2017
Version c of 700 bit/s codec that uses newamp1 fixed rate VQ of amplitudes.
Encodes 320 speech samples (40ms of speech) into 28 bits.
The codec2 algorithm actually operates internally on 10ms (80
sample) frames, so we run the encoding algorithm four times:
frame 0: nothing
frame 1: nothing
frame 2: nothing
frame 3: 18 bit 2 stage VQ (9 bits/stage), 4 bits energy,
6 bit scalar Wo/voicing. No spare bits.
Voicing is encoded using the 0 index of the Wo quantiser.
The bit allocation is:
Parameter frames 1-3 frame 4 Total
-----------------------------------------------------------
Harmonic magnitudes (rate k VQ) 0 18 18
Energy 0 4 4
log Wo/voicing 0 6 6
TOTAL 0 28 28
\*---------------------------------------------------------------------------*/
void codec2_encode_700c(struct CODEC2 *c2, unsigned char * bits, short speech[])
{
MODEL model;
int indexes[4], i, M=4;
unsigned int nbit = 0;
assert(c2 != NULL);
memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
for(i=0; i<M; i++) {
analyse_one_frame(c2, &model, &speech[i*c2->n_samp]);
}
int K = 20;
float rate_K_vec[K], mean;
float rate_K_vec_no_mean[K], rate_K_vec_no_mean_[K];
newamp1_model_to_indexes(&c2->c2const,
indexes,
&model,
rate_K_vec,
c2->rate_K_sample_freqs_kHz,
K,
&mean,
rate_K_vec_no_mean,
rate_K_vec_no_mean_);
pack_natural_or_gray(bits, &nbit, indexes[0], 9, 0);
pack_natural_or_gray(bits, &nbit, indexes[1], 9, 0);
pack_natural_or_gray(bits, &nbit, indexes[2], 4, 0);
pack_natural_or_gray(bits, &nbit, indexes[3], 6, 0);
assert(nbit == (unsigned)codec2_bits_per_frame(c2));
}
/*---------------------------------------------------------------------------*\
FUNCTION....: codec2_decode_700c
AUTHOR......: David Rowe
DATE CREATED: August 2015
Decodes frames of 28 bits into 320 samples (40ms) of speech.
\*---------------------------------------------------------------------------*/
void codec2_decode_700c(struct CODEC2 *c2, short speech[], const unsigned char * bits)
{
MODEL model[4];
int indexes[4];
int i;
unsigned int nbit = 0;
assert(c2 != NULL);
/* unpack bits from channel ------------------------------------*/
indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0);
indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0);
indexes[2] = unpack_natural_or_gray(bits, &nbit, 4, 0);
indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0);
int M = 4;
COMP HH[M][MAX_AMP+1];
float interpolated_surface_[M][NEWAMP1_K];
newamp1_indexes_to_model(&c2->c2const,
model,
(COMP*)HH,
(float*)interpolated_surface_,
c2->prev_rate_K_vec_,
&c2->Wo_left,
&c2->voicing_left,
c2->rate_K_sample_freqs_kHz,
NEWAMP1_K,
c2->phase_fft_fwd_cfg,
c2->phase_fft_inv_cfg,
indexes);
for(i=0; i<M; i++) {
/* 700C is a little quiter so lets apply some experimentally derived audio gain */
synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], &HH[i][0], 1.5);
}
}
/*---------------------------------------------------------------------------*\
FUNCTION....: codec2_energy_700c
AUTHOR......: Jeroen Vreeken
DATE CREATED: Jan 2017
Decodes energy value from encoded bits.
\*---------------------------------------------------------------------------*/
float codec2_energy_700c(struct CODEC2 *c2, const unsigned char * bits)
{
int indexes[4];
unsigned int nbit = 0;
assert(c2 != NULL);
/* unpack bits from channel ------------------------------------*/
indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0);
indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0);
indexes[2] = unpack_natural_or_gray(bits, &nbit, 4, 0);
indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0);
float mean = newamp1_energy_cb[0].cb[indexes[2]];
mean -= 10;
if (indexes[3] == 0)
mean -= 10;
return POW10F(mean/10.0);
}
#ifndef CORTEX_M4
float codec2_energy_450(struct CODEC2 *c2, const unsigned char * bits)
{
int indexes[4];
unsigned int nbit = 0;
assert(c2 != NULL);
/* unpack bits from channel ------------------------------------*/
indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0);
//indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0);
indexes[2] = unpack_natural_or_gray(bits, &nbit, 3, 0);
indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0);
float mean = newamp2_energy_cb[0].cb[indexes[2]];
mean -= 10;
if (indexes[3] == 0)
mean -= 10;
return POW10F(mean/10.0);
}
/*---------------------------------------------------------------------------*\
FUNCTION....: codec2_get_energy()
AUTHOR......: Jeroen Vreeken
DATE CREATED: 08/03/2016
Extract energy value from an encoded frame.
\*---------------------------------------------------------------------------*/
float codec2_get_energy(struct CODEC2 *c2, const unsigned char *bits)
{
assert(c2 != NULL);
assert(
( CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) ||
( CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) ||
( CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) ||
( CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) ||
( CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) ||
( CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) ||
( CODEC2_MODE_ACTIVE(CODEC2_MODE_700, c2->mode)) ||
( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, c2->mode)) ||
( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) ||
( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) ||
( CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode))
);
MODEL model;
float xq_dec[2] = {};
int e_index, WoE_index;
float e;
unsigned int nbit;
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_3200, c2->mode)) {
nbit = 1 + 1 + WO_BITS;
e_index = unpack(bits, &nbit, E_BITS);
e = decode_energy(e_index, E_BITS);
}
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_2400, c2->mode)) {
nbit = 1 + 1;
WoE_index = unpack(bits, &nbit, WO_E_BITS);
decode_WoE(&c2->c2const, &model, &e, xq_dec, WoE_index);
}
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1600, c2->mode)) {
nbit = 1 + 1 + WO_BITS;
e_index = unpack(bits, &nbit, E_BITS);
e = decode_energy(e_index, E_BITS);
}
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) {
nbit = 1 + 1;
WoE_index = unpack(bits, &nbit, WO_E_BITS);
decode_WoE(&c2->c2const, &model, &e, xq_dec, WoE_index);
}
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) {
nbit = 1 + 1 + 1 + 1 + WO_BITS;
e_index = unpack_natural_or_gray(bits, &nbit, E_BITS, c2->gray);
e = decode_energy(e_index, E_BITS);
}
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) {
nbit = 1 + 1;
WoE_index = unpack(bits, &nbit, WO_E_BITS);
decode_WoE(&c2->c2const, &model, &e, xq_dec, WoE_index);
}
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700, c2->mode)) {
nbit = 1 + 5;
e_index = unpack_natural_or_gray(bits, &nbit, 3, c2->gray);
e = decode_energy(e_index, 3);
}
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700B, c2->mode)) {
nbit = 1 + 5;
e_index = unpack_natural_or_gray(bits, &nbit, 3, c2->gray);
e = decode_energy(e_index, 3);
}
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) {
e = codec2_energy_700c(c2, bits);
}
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode) || CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) {
e = codec2_energy_450(c2, bits);
}
return e;
}
/*---------------------------------------------------------------------------*\
FUNCTION....: codec2_encode_450
AUTHOR......: Thomas Kurin and Stefan Erhardt
INSTITUTE...: Institute for Electronics Engineering, University of Erlangen-Nuremberg
DATE CREATED: July 2018
450 bit/s codec that uses newamp2 fixed rate VQ of amplitudes.
Encodes 320 speech samples (40ms of speech) into 28 bits.
The codec2 algorithm actually operates internally on 10ms (80
sample) frames, so we run the encoding algorithm four times:
frame 0: nothing
frame 1: nothing
frame 2: nothing
frame 3: 9 bit 1 stage VQ, 3 bits energy,
6 bit scalar Wo/voicing/plosive. No spare bits.
If a plosive is detected the frame at the energy-step is encoded.
Voicing is encoded using the 000000 index of the Wo quantiser.
Plosive is encoded using the 111111 index of the Wo quantiser.
The bit allocation is:
Parameter frames 1-3 frame 4 Total
-----------------------------------------------------------
Harmonic magnitudes (rate k VQ) 0 9 9
Energy 0 3 3
log Wo/voicing/plosive 0 6 6
TOTAL 0 18 18
\*---------------------------------------------------------------------------*/
void codec2_encode_450(struct CODEC2 *c2, unsigned char * bits, short speech[])
{
MODEL model;
int indexes[4], i,h, M=4;
unsigned int nbit = 0;
int plosiv = 0;
float energydelta[M];
int spectralCounter;
assert(c2 != NULL);
memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
for(i=0; i<M; i++){
analyse_one_frame(c2, &model, &speech[i*c2->n_samp]);
energydelta[i] = 0;
spectralCounter = 0;
for(h = 0;h<(model.L);h++){
//only detect above 300 Hz
if(h*model.Wo*(c2->c2const.Fs/2000.0)/M_PI > 0.3){
energydelta[i] = energydelta[i] + 20.0*log10(model.A[10]+1E-16);
spectralCounter = spectralCounter+1;
}
}
energydelta[i] = energydelta[i] / spectralCounter ;
}
//Constants for plosive Detection tdB = threshold; minPwr = from below this level plosives have to rise
float tdB = 15; //not fixed can be changed
float minPwr = 15; //not fixed can be changed
if((c2->energy_prev)<minPwr && energydelta[0]>((c2->energy_prev)+tdB)){
plosiv = 1;
}
if(energydelta[0]<minPwr && energydelta[1]>(energydelta[0]+tdB)){
plosiv = 2;
}
if(energydelta[1]<minPwr &&energydelta[2]>(energydelta[1]+tdB)){
plosiv = 3;
}
if(energydelta[2]<minPwr &&energydelta[3]>(energydelta[2]+tdB)){
plosiv = 4;
}
if(plosiv != 0 && plosiv != 4){
analyse_one_frame(c2, &model, &speech[(plosiv-1)*c2->n_samp]);
}
c2->energy_prev = energydelta[3];
int K = 29;
float rate_K_vec[K], mean;
float rate_K_vec_no_mean[K], rate_K_vec_no_mean_[K];
if(plosiv > 0){
plosiv = 1;
}
newamp2_model_to_indexes(&c2->c2const,
indexes,
&model,
rate_K_vec,
c2->n2_rate_K_sample_freqs_kHz,
K,
&mean,
rate_K_vec_no_mean,
rate_K_vec_no_mean_,
plosiv);
pack_natural_or_gray(bits, &nbit, indexes[0], 9, 0);
//pack_natural_or_gray(bits, &nbit, indexes[1], 9, 0);
pack_natural_or_gray(bits, &nbit, indexes[2], 3, 0);
pack_natural_or_gray(bits, &nbit, indexes[3], 6, 0);
assert(nbit == (unsigned)codec2_bits_per_frame(c2));
}
/*---------------------------------------------------------------------------*\
FUNCTION....: codec2_decode_450
AUTHOR......: Thomas Kurin and Stefan Erhardt
INSTITUTE...: Institute for Electronics Engineering, University of Erlangen-Nuremberg
DATE CREATED: July 2018
\*---------------------------------------------------------------------------*/
void codec2_decode_450(struct CODEC2 *c2, short speech[], const unsigned char * bits)
{
MODEL model[4];
int indexes[4];
int i;
unsigned int nbit = 0;
assert(c2 != NULL);
/* unpack bits from channel ------------------------------------*/
indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0);
//indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0);
indexes[2] = unpack_natural_or_gray(bits, &nbit, 3, 0);
indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0);
int M = 4;
COMP HH[M][MAX_AMP+1];
float interpolated_surface_[M][NEWAMP2_K];
int pwbFlag = 0;
newamp2_indexes_to_model(&c2->c2const,
model,
(COMP*)HH,
(float*)interpolated_surface_,
c2->n2_prev_rate_K_vec_,
&c2->Wo_left,
&c2->voicing_left,
c2->n2_rate_K_sample_freqs_kHz,
NEWAMP2_K,
c2->phase_fft_fwd_cfg,
c2->phase_fft_inv_cfg,
indexes,
1.5,
pwbFlag);
for(i=0; i<M; i++) {
synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], &HH[i][0], 1.5);
}
}
/*---------------------------------------------------------------------------*\
FUNCTION....: codec2_decode_450pwb
AUTHOR......: Thomas Kurin and Stefan Erhardt
INSTITUTE...: Institute for Electronics Engineering, University of Erlangen-Nuremberg
DATE CREATED: July 2018
Decodes the 450 codec data in pseudo wideband at 16kHz samplerate.
\*---------------------------------------------------------------------------*/
void codec2_decode_450pwb(struct CODEC2 *c2, short speech[], const unsigned char * bits)
{
MODEL model[4];
int indexes[4];
int i;
unsigned int nbit = 0;
assert(c2 != NULL);
/* unpack bits from channel ------------------------------------*/
indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0);
//indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0);
indexes[2] = unpack_natural_or_gray(bits, &nbit, 3, 0);
indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0);
int M = 4;
COMP HH[M][MAX_AMP+1];
float interpolated_surface_[M][NEWAMP2_16K_K];
int pwbFlag = 1;
newamp2_indexes_to_model(&c2->c2const,
model,
(COMP*)HH,
(float*)interpolated_surface_,
c2->n2_pwb_prev_rate_K_vec_,
&c2->Wo_left,
&c2->voicing_left,
c2->n2_pwb_rate_K_sample_freqs_kHz,
NEWAMP2_16K_K,
c2->phase_fft_fwd_cfg,
c2->phase_fft_inv_cfg,
indexes,
1.5,
pwbFlag);
for(i=0; i<M; i++) {
synthesise_one_frame(c2, &speech[c2->n_samp*i], &model[i], &HH[i][0], 1.5);
}
}
#endif
/*---------------------------------------------------------------------------* \
FUNCTION....: synthesise_one_frame()
AUTHOR......: David Rowe
DATE CREATED: 23/8/2010
Synthesise 80 speech samples (10ms) from model parameters.
\*---------------------------------------------------------------------------*/
void synthesise_one_frame(struct CODEC2 *c2, short speech[], MODEL *model, COMP Aw[], float gain)
{
int i;
//PROFILE_VAR(phase_start, pf_start, synth_start);
//#ifdef DUMP
//dump_quantised_model(model);
//#endif
//PROFILE_SAMPLE(phase_start);
if ( CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode) || CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode) || CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode) ) {
/* newamp1/2, we've already worked out rate L phase */
COMP *H = Aw;
phase_synth_zero_order(c2->n_samp, model, &c2->ex_phase, H);
} else {
/* LPC based phase synthesis */
COMP H[MAX_AMP+1];
sample_phase(model, H, Aw);
phase_synth_zero_order(c2->n_samp, model, &c2->ex_phase, H);
}
//PROFILE_SAMPLE_AND_LOG(pf_start, phase_start, " phase_synth");
postfilter(model, &c2->bg_est);
//PROFILE_SAMPLE_AND_LOG(synth_start, pf_start, " postfilter");
synthesise(c2->n_samp, c2->fftr_inv_cfg, c2->Sn_, model, c2->Pn, 1);
for(i=0; i<c2->n_samp; i++) {
c2->Sn_[i] *= gain;
}
//PROFILE_SAMPLE_AND_LOG2(synth_start, " synth");
ear_protection(c2->Sn_, c2->n_samp);
for(i=0; i<c2->n_samp; i++) {
if (c2->Sn_[i] > 32767.0)
speech[i] = 32767;
else if (c2->Sn_[i] < -32767.0)
speech[i] = -32767;
else
speech[i] = c2->Sn_[i];
}
}
/*---------------------------------------------------------------------------*\
FUNCTION....: analyse_one_frame()
AUTHOR......: David Rowe
DATE CREATED: 23/8/2010
Extract sinusoidal model parameters from 80 speech samples (10ms of
speech).
\*---------------------------------------------------------------------------*/
void analyse_one_frame(struct CODEC2 *c2, MODEL *model, short speech[])
{
COMP Sw[FFT_ENC];
float pitch;
int i;
//PROFILE_VAR(dft_start, nlp_start, model_start, two_stage, estamps);
int n_samp = c2->n_samp;
int m_pitch = c2->m_pitch;
/* Read input speech */
for(i=0; i<m_pitch-n_samp; i++)
c2->Sn[i] = c2->Sn[i+n_samp];
for(i=0; i<n_samp; i++)
c2->Sn[i+m_pitch-n_samp] = speech[i];
//PROFILE_SAMPLE(dft_start);
dft_speech(&c2->c2const, c2->fft_fwd_cfg, Sw, c2->Sn, c2->w);
//PROFILE_SAMPLE_AND_LOG(nlp_start, dft_start, " dft_speech");
/* Estimate pitch */
nlp(c2->nlp, c2->Sn, n_samp, &pitch, Sw, c2->W, &c2->prev_f0_enc);
//PROFILE_SAMPLE_AND_LOG(model_start, nlp_start, " nlp");
model->Wo = TWO_PI/pitch;
model->L = PI/model->Wo;
/* estimate model parameters */
two_stage_pitch_refinement(&c2->c2const, model, Sw);
//PROFILE_SAMPLE_AND_LOG(two_stage, model_start, " two_stage");
estimate_amplitudes(model, Sw, c2->W, 0);
//PROFILE_SAMPLE_AND_LOG(estamps, two_stage, " est_amps");
est_voicing_mbe(&c2->c2const, model, Sw, c2->W);
//PROFILE_SAMPLE_AND_LOG2(estamps, " est_voicing");
#ifdef DUMP
dump_model(model);
#endif
}
/*---------------------------------------------------------------------------*\
FUNCTION....: ear_protection()
AUTHOR......: David Rowe
DATE CREATED: Nov 7 2012
Limits output level to protect ears when there are bit errors or the input
is overdriven. This doesn't correct or mask bit errors, just reduces the
worst of their damage.
\*---------------------------------------------------------------------------*/
static void ear_protection(float in_out[], int n) {
float max_sample, over, gain;
int i;
/* find maximum sample in frame */
max_sample = 0.0;
for(i=0; i<n; i++)
if (in_out[i] > max_sample)
max_sample = in_out[i];
/* determine how far above set point */
over = max_sample/30000.0;
/* If we are x dB over set point we reduce level by 2x dB, this
attenuates major excursions in amplitude (likely to be caused
by bit errors) more than smaller ones */
if (over > 1.0) {
gain = 1.0/(over*over);
//fprintf(stderr, "gain: %f\n", gain);
for(i=0; i<n; i++)
in_out[i] *= gain;
}
}
void codec2_set_lpc_post_filter(struct CODEC2 *c2, int enable, int bass_boost, float beta, float gamma)
{
assert((beta >= 0.0) && (beta <= 1.0));
assert((gamma >= 0.0) && (gamma <= 1.0));
c2->lpc_pf = enable;
c2->bass_boost = bass_boost;
c2->beta = beta;
c2->gamma = gamma;
}
/*
Allows optional stealing of one of the voicing bits for use as a
spare bit, only 1300 & 1400 & 1600 bit/s supported for now.
Experimental method of sending voice/data frames for FreeDV.
*/
int codec2_get_spare_bit_index(struct CODEC2 *c2)
{
assert(c2 != NULL);
switch(c2->mode) {
case CODEC2_MODE_1300:
return 2; // bit 2 (3th bit) is v2 (third voicing bit)
break;
case CODEC2_MODE_1400:
return 10; // bit 10 (11th bit) is v2 (third voicing bit)
break;
case CODEC2_MODE_1600:
return 15; // bit 15 (16th bit) is v2 (third voicing bit)
break;
case CODEC2_MODE_700:
return 26; // bits 26 and 27 are spare
break;
case CODEC2_MODE_700B:
return 27; // bit 27 is spare
break;
}
return -1;
}
/*
Reconstructs the spare voicing bit. Note works on unpacked bits
for convenience.
*/
int codec2_rebuild_spare_bit(struct CODEC2 *c2, int unpacked_bits[])
{
int v1,v3;
assert(c2 != NULL);
v1 = unpacked_bits[1];
switch(c2->mode) {
case CODEC2_MODE_1300:
v3 = unpacked_bits[1+1+1];
/* if either adjacent frame is voiced, make this one voiced */
unpacked_bits[2] = (v1 || v3);
return 0;
break;
case CODEC2_MODE_1400:
v3 = unpacked_bits[1+1+8+1];
/* if either adjacent frame is voiced, make this one voiced */
unpacked_bits[10] = (v1 || v3);
return 0;
break;
case CODEC2_MODE_1600:
v3 = unpacked_bits[1+1+8+5+1];
/* if either adjacent frame is voiced, make this one voiced */
unpacked_bits[15] = (v1 || v3);
return 0;
break;
}
return -1;
}
void codec2_set_natural_or_gray(struct CODEC2 *c2, int gray)
{
assert(c2 != NULL);
c2->gray = gray;
}
void codec2_set_softdec(struct CODEC2 *c2, float *softdec)
{
assert(c2 != NULL);
c2->softdec = softdec;
}
void codec2_open_lspEWov(struct CODEC2 *codec2_state, char *filename) {
if ((codec2_state->flspEWov = fopen(filename, "wb")) == NULL) {
fprintf(stderr, "error opening feature file: %s\n", filename);
exit(1);
}
}