545 files changed, 230976 insertions, 0 deletions
diff --git a/audio_codec/libamr/dec_dtx.c b/audio_codec/libamr/dec_dtx.c new file mode 100644 index 0000000..f6c9caf --- a/dev/null +++ b/audio_codec/libamr/dec_dtx.c @@ -0,0 +1,683 @@ +/* + *=================================================================== + * 3GPP AMR Wideband Floating-point Speech Codec + *=================================================================== + */ +#include <stdlib.h> +#include <memory.h> +#include <math.h> +#include "typedef.h" +#include "dec_dtx.h" +#include "dec_lpc.h" +#include "dec_util.h" + + +#define MAX_31 (Word32)0x3FFFFFFF +#define L_FRAME 256 /* Frame size */ +#define RX_SPEECH_LOST 2 +#define RX_SPEECH_BAD 3 +#define RX_SID_FIRST 4 +#define RX_SID_UPDATE 5 +#define RX_SID_BAD 6 +#define RX_NO_DATA 7 +#define ISF_GAP 128 /* 50 */ +#define D_DTX_MAX_EMPTY_THRESH 50 +#define GAIN_FACTOR 75 +#define ISF_FACTOR_LOW 256 +#define ISF_FACTOR_STEP 2 +#define ISF_DITH_GAP 448 +#define D_DTX_HANG_CONST 7 /* yields eight frames of SP HANGOVER */ +#define D_DTX_ELAPSED_FRAMES_THRESH (24 + 7 - 1) +#define RANDOM_INITSEED 21845 /* own random init value */ + + +/* + * D_DTX_reset + * + * Parameters: + * st O: state struct + * + * Function: + * Initializes state memory + * + * Returns: + * non-zero with error, zero for ok + */ +int D_DTX_reset(D_DTX_State *st, const Word16 *isf_init) +{ + Word32 i; + + if (st == (D_DTX_State*)NULL) { + return(-1); + } + st->mem_since_last_sid = 0; + st->mem_true_sid_period_inv = (1 << 13); /* 0.25 in Q15 */ + st->mem_log_en = 3500; + st->mem_log_en_prev = 3500; + + /* low level noise for better performance in DTX handover cases */ + st->mem_cng_seed = RANDOM_INITSEED; + st->mem_hist_ptr = 0; + + /* Init isf_hist[] and decoder log frame energy */ + memcpy(st->mem_isf, isf_init, M * sizeof(Word16)); + memcpy(st->mem_isf_prev, isf_init, M * sizeof(Word16)); + + for (i = 0; i < D_DTX_HIST_SIZE; i++) { + memcpy(&st->mem_isf_buf[i * M], isf_init, M * sizeof(Word16)); + st->mem_log_en_buf[i] = 3500; + } + st->mem_dtx_hangover_count = D_DTX_HANG_CONST; + st->mem_dec_ana_elapsed_count = 127; + st->mem_sid_frame = 0; + st->mem_valid_data = 0; + st->mem_dtx_hangover_added = 0; + st->mem_dtx_global_state = SPEECH; + st->mem_data_updated = 0; + st->mem_dither_seed = RANDOM_INITSEED; + st->mem_cn_dith = 0; + + return(0); +} + + +/* + * D_DTX_init + * + * Parameters: + * st I/O: state struct + * + * Function: + * Allocates state memory and initializes state memory + * + * Returns: + * non-zero with error, zero for ok + */ +int D_DTX_init(D_DTX_State **st, const Word16 *isf_init) +{ + D_DTX_State *s; + + if (st == (D_DTX_State**)NULL) { + return(-1); + } + + *st = NULL; + + /* allocate memory */ + if ((s = (D_DTX_State*)malloc(sizeof(D_DTX_State))) == NULL) { + return(-1); + } + + D_DTX_reset(s, isf_init); + *st = s; + + return(0); +} + + +/* + * D_DTX_exit + * + * Parameters: + * state I/0: State struct + * + * Function: + * The memory used for state memory is freed + * + * Returns: + * void + */ +void D_DTX_exit(D_DTX_State **st) +{ + if (st == NULL || *st == NULL) { + return; + } + + /* deallocate memory */ + free(*st); + *st = NULL; + + return; +} + + +/* + * D_DTX_rx_handler + * + * Parameters: + * st I/O: State struct + * frame_type I: Frame type + * + * Function: + * Analyze received frame + * + * Table of new SPD synthesis states + * + * | previous SPD_synthesis_state + * Incoming | + * frame_type | SPEECH | DTX | D_DTX_MUTE + * --------------------------------------------------------------- + * RX_SPEECH_GOOD , | | | + * RX_SPEECH_PR_DEGRADED | SPEECH | SPEECH | SPEECH + * ---------------------------------------------------------------- + * RX_SPEECH_BAD, | SPEECH | DTX | D_DTX_MUTE + * ---------------------------------------------------------------- + * RX_SID_FIRST, | DTX | DTX/(D_DTX_MUTE)| D_DTX_MUTE + * ---------------------------------------------------------------- + * RX_SID_UPDATE, | DTX | DTX | DTX + * ---------------------------------------------------------------- + * RX_SID_BAD, | DTX | DTX/(D_DTX_MUTE)| D_DTX_MUTE + * ---------------------------------------------------------------- + * RX_NO_DATA, | SPEECH | DTX/(D_DTX_MUTE)| D_DTX_MUTE + * RX_SPARE |(class2 garb.)| | + * ---------------------------------------------------------------- + * + * Returns: + * new state + */ +UWord8 D_DTX_rx_handler(D_DTX_State *st, UWord8 frame_type) +{ + UWord8 newState; + UWord8 encState; + + /* DTX if SID frame or previously in DTX{_MUTE} + * and (NO_RX OR BAD_SPEECH) + */ + if ((frame_type == RX_SID_FIRST) | (frame_type == RX_SID_UPDATE) | + (frame_type == RX_SID_BAD) | (((st->mem_dtx_global_state == DTX) | + (st->mem_dtx_global_state == D_DTX_MUTE)) & ((frame_type == RX_NO_DATA) | + (frame_type == RX_SPEECH_BAD) | (frame_type == RX_SPEECH_LOST)))) { + newState = DTX; + + /* stay in mute for these input types */ + if ((st->mem_dtx_global_state == D_DTX_MUTE) & + ((frame_type == RX_SID_BAD) | (frame_type == RX_SID_FIRST) | + (frame_type == RX_SPEECH_LOST) | (frame_type == RX_NO_DATA))) { + newState = D_DTX_MUTE; + } + + /* evaluate if noise parameters are too old */ + /* since_last_sid is reset when CN parameters have been updated */ + st->mem_since_last_sid = D_UTIL_saturate(st->mem_since_last_sid + 1); + + /* no update of sid parameters in DTX for a Word32 while */ + if ((frame_type != RX_SID_UPDATE) && + (st->mem_since_last_sid > D_DTX_MAX_EMPTY_THRESH)) { + newState = D_DTX_MUTE; + } + } else { + newState = SPEECH; + st->mem_since_last_sid = 0; + } + + /* + * reset the decAnaElapsed Counter when receiving CNI data the first + * time, to robustify counter missmatch after handover + * this might delay the bwd CNI analysis in the new decoder slightly. + */ + if ((st->mem_data_updated == 0) & (frame_type == RX_SID_UPDATE)) { + st->mem_dec_ana_elapsed_count = 0; + } + + /* + * update the SPE-SPD DTX hangover synchronization + * to know when SPE has added dtx hangover + */ + st->mem_dec_ana_elapsed_count++; + + /* saturate */ + if (st->mem_dec_ana_elapsed_count > 127) { + st->mem_dec_ana_elapsed_count = 127; + } + + st->mem_dtx_hangover_added = 0; + + if ((frame_type == RX_SID_FIRST) | (frame_type == RX_SID_UPDATE) | + (frame_type == RX_SID_BAD) | (frame_type == RX_NO_DATA)) { + encState = DTX; + } else { + encState = SPEECH; + } + + if (encState == SPEECH) { + st->mem_dtx_hangover_count = D_DTX_HANG_CONST; + } else { + if (st->mem_dec_ana_elapsed_count > D_DTX_ELAPSED_FRAMES_THRESH) { + st->mem_dtx_hangover_added = 1; + st->mem_dec_ana_elapsed_count = 0; + st->mem_dtx_hangover_count = 0; + } else if (st->mem_dtx_hangover_count == 0) { + st->mem_dec_ana_elapsed_count = 0; + } else { + st->mem_dtx_hangover_count--; + } + } + + if (newState != SPEECH) { + /* + * DTX or D_DTX_MUTE + * CN data is not in a first SID, first SIDs are marked as SID_BAD + * but will do backwards analysis if a hangover period has been added + * according to the state machine above + */ + st->mem_sid_frame = 0; + st->mem_valid_data = 0; + + if (frame_type == RX_SID_FIRST) { + st->mem_sid_frame = 1; + } else if (frame_type == RX_SID_UPDATE) { + st->mem_sid_frame = 1; + st->mem_valid_data = 1; + } else if (frame_type == RX_SID_BAD) { + st->mem_sid_frame = 1; + st->mem_dtx_hangover_added = 0; /* use old data */ + } + } + + return newState; + + /* newState is used by both SPEECH AND DTX synthesis routines */ +} + + +/* + * D_DTX_cn_dithering + * + * Parameters: + * isf I/O: CN ISF vector + * L_log_en_int I/O: energy parameter + * dither_seed I/O: random seed + * + * Function: + * Confort noise dithering + * + * Returns: + * void + */ +static void D_DTX_cn_dithering(Word16 isf[M], Word32 *L_log_en_int, + Word16 *dither_seed) +{ + Word32 temp, temp1, i, dither_fac, rand_dith, rand_dith2; + + /* Insert comfort noise dithering for energy parameter */ + rand_dith = D_UTIL_random(dither_seed) >> 1; + rand_dith2 = D_UTIL_random(dither_seed) >> 1; + rand_dith = rand_dith + rand_dith2; + *L_log_en_int = *L_log_en_int + ((rand_dith * GAIN_FACTOR) << 1); + + if (*L_log_en_int < 0) { + *L_log_en_int = 0; + } + + /* Insert comfort noise dithering for spectral parameters (ISF-vector) */ + dither_fac = ISF_FACTOR_LOW; + rand_dith = D_UTIL_random(dither_seed) >> 1; + rand_dith2 = D_UTIL_random(dither_seed) >> 1; + rand_dith = rand_dith + rand_dith2; + temp = isf[0] + (((rand_dith * dither_fac) + 0x4000) >> 15); + + /* Make sure that isf[0] will not get negative values */ + if (temp < ISF_GAP) { + isf[0] = ISF_GAP; + } else { + isf[0] = (Word16)temp; + } + + for (i = 1; i < M - 1; i++) { + dither_fac = dither_fac + ISF_FACTOR_STEP; + rand_dith = D_UTIL_random(dither_seed) >> 1; + rand_dith2 = D_UTIL_random(dither_seed) >> 1; + rand_dith = rand_dith + rand_dith2; + temp = isf[i] + (((rand_dith * dither_fac) + 0x4000) >> 15); + temp1 = temp - isf[i - 1]; + + /* Make sure that isf spacing remains at least ISF_DITH_GAP Hz */ + if (temp1 < ISF_DITH_GAP) { + isf[i] = (Word16)(isf[i - 1] + ISF_DITH_GAP); + } else { + isf[i] = (Word16)temp; + } + } + + /* Make sure that isf[M-2] will not get values above 16384 */ + if (isf[M - 2] > 16384) { + isf[M - 2] = 16384; + } + + return; +} + + +/* + * D_DTX_exe + * + * Parameters: + * st I/O: state struct + * exc2 O: CN excitation + * new_state I: New DTX state + * prms I: Vector of synthesis parameters + * isf O: CN ISF vector + * + * Function: + * Confort noise generation + * + * Returns: + * void + */ +void D_DTX_exe(D_DTX_State *st, Word16 *exc2, Word16 new_state, Word16 isf[], + Word16 **prms) +{ + + Word32 i, j, L_tmp, ptr; + Word32 exp0, int_fac; + Word32 gain; + Word32 L_isf[M], L_log_en_int, level32, ener32; + Word16 log_en_index; + Word16 tmp_int_length; + Word16 exp, log_en_int_e, log_en_int_m, level; + + + /* + * This function is called if synthesis state is not SPEECH. + * The globally passed inputs to this function are + * st->sid_frame + * st->valid_data + * st->dtxHangoverAdded + * new_state (SPEECH, DTX, D_DTX_MUTE) + */ + if ((st->mem_dtx_hangover_added != 0) & (st->mem_sid_frame != 0)) { + /* sid_first after dtx hangover period + * or sid_upd after dtxhangover + * consider twice the last frame + */ + ptr = st->mem_hist_ptr + 1; + + if (ptr == D_DTX_HIST_SIZE) { + ptr = 0; + } + + memcpy(&st->mem_isf_buf[ptr * M], &st->mem_isf_buf[st->mem_hist_ptr * M], + M * sizeof(Word16)); + + st->mem_log_en_buf[ptr] = st->mem_log_en_buf[st->mem_hist_ptr]; + + /* compute mean log energy and isf from decoded signal (SID_FIRST) */ + st->mem_log_en = 0; + memset(L_isf, 0, M * sizeof(Word32)); + + /* average energy and isf */ + for (i = 0; i < D_DTX_HIST_SIZE; i++) { + /* + * Division by D_DTX_HIST_SIZE = 8 has been done in dtx_buffer log_en + * is in Q10 + */ + st->mem_log_en = (Word16)(st->mem_log_en + st->mem_log_en_buf[i]); + + for (j = 0; j < M; j++) { + L_isf[j] = L_isf[j] + st->mem_isf_buf[i * M + j]; + } + } + + /* st->log_en in Q9 */ + st->mem_log_en = (Word16)(st->mem_log_en >> 1); + + /* + * Add 2 in Q9, in order to have only positive values for Pow2 + * this value is subtracted back after Pow2 function + */ + st->mem_log_en = (Word16)(st->mem_log_en + 1024); + + if (st->mem_log_en < 0) { + st->mem_log_en = 0; + } + + for (j = 0; j < M; j++) { + st->mem_isf[j] = (Word16)(L_isf[j] >> 3); /* divide by 8 */ + } + } + + if (st->mem_sid_frame != 0) { + /* + * Set old SID parameters, always shift + * even if there is no new valid_data + */ + memcpy(st->mem_isf_prev, st->mem_isf, M * sizeof(Word16)); + st->mem_log_en_prev = st->mem_log_en; + + if (st->mem_valid_data != 0) { /* new data available (no CRC) */ + /* st->true_sid_period_inv = 1.0f/st->since_last_sid; */ + + /* + * Compute interpolation factor, since the division only works + * for values of since_last_sid < 32 we have to limit + * the interpolation to 32 frames + */ + tmp_int_length = st->mem_since_last_sid; + + if (tmp_int_length > 32) { + tmp_int_length = 32; + } + + if (tmp_int_length >= 2) { + st->mem_true_sid_period_inv = + (Word16)(0x2000000 / (tmp_int_length << 10)); + } else { + st->mem_true_sid_period_inv = 1 << 14; /* 0.5 it Q15 */ + } + + D_LPC_isf_noise_d(*prms, st->mem_isf); + (*prms) += 5; + log_en_index = *(*prms)++; + + /* read background noise stationarity information */ + st->mem_cn_dith = *(*prms)++; + + /* + * st->log_en = (Float32)log_en_index / 2.625 - 2.0; + * log2(E) in Q9 (log2(E) lies in between -2:22) + */ + st->mem_log_en = (Word16)(log_en_index << (15 - 6)); + + /* Divide by 2.625 */ + st->mem_log_en = (Word16)((st->mem_log_en * 12483) >> 15); + + /* + * Subtract 2 in Q9 is done later, after Pow2 function + * no interpolation at startup after coder reset + * or when SID_UPD has been received right after SPEECH + */ + if ((st->mem_data_updated == 0) || + (st->mem_dtx_global_state == SPEECH)) { + memcpy(st->mem_isf_prev, st->mem_isf, M * sizeof(Word16)); + st->mem_log_en_prev = st->mem_log_en; + } + } /* endif valid_data */ + } /* endif sid_frame */ + + if ((st->mem_sid_frame != 0) && (st->mem_valid_data != 0)) { + st->mem_since_last_sid = 0; + } + + /* Interpolate SID info */ + if (st->mem_since_last_sid < 32) { + int_fac = st->mem_since_last_sid << 10; /* Q10 */ + } else { + int_fac = 32767; + } + /* Q10 * Q15 -> Q10 */ + int_fac = (int_fac * st->mem_true_sid_period_inv) >> 15; + + /* Maximize to 1.0 in Q10 */ + if (int_fac > 1024) { + int_fac = 1024; + } + int_fac = int_fac << 4; /* Q10 -> Q14 */ + L_log_en_int = (int_fac * st->mem_log_en) << 1; /* Q14 * Q9 -> Q24 */ + + for (i = 0; i < M; i++) { + /* Q14 * Q15 -> Q14 */ + isf[i] = (Word16)((int_fac * st->mem_isf[i]) >> 15); + } + int_fac = 16384 - int_fac; /* 1-k in Q14 */ + + /* ( Q14 * Q9 -> Q24 ) + Q24 -> Q24 */ + L_log_en_int = L_log_en_int + ((int_fac * st->mem_log_en_prev) << 1); + + for (i = 0; i < M; i++) { + /* Q14 + (Q14 * Q15 -> Q14) -> Q14 */ + L_tmp = isf[i] + ((int_fac * st->mem_isf_prev[i]) >> 15); + isf[i] = (Word16)(L_tmp << 1); /* Q14 -> Q15 */ + } + + /* If background noise is non-stationary, insert comfort noise dithering */ + if (st->mem_cn_dith != 0) { + D_DTX_cn_dithering(isf, &L_log_en_int, &st->mem_dither_seed); + } + + /* L_log_en_int corresponds to log2(E)+2 in Q24, i.e log2(gain)+1 in Q25 */ + L_log_en_int = (L_log_en_int >> 9); /* Q25 -> Q16 */ + + /* Find integer part */ + log_en_int_e = (Word16)((L_log_en_int) >> 16); + + /* Find fractional part */ + log_en_int_m = (Word16)((L_log_en_int - (log_en_int_e << 16)) >> 1); + + /* + * Subtract 2 from L_log_en_int in Q9, + * i.e divide the gain by 2 (energy by 4) + * Add 16 in order to have the result of pow2 in Q16 + */ + log_en_int_e = (Word16)(log_en_int_e + (16 - 1)); + + /* level = (Float32)( pow( 2.0f, log_en ) ); */ + level32 = D_UTIL_pow2(log_en_int_e, log_en_int_m); /* Q16 */ + exp0 = D_UTIL_norm_l(level32); + level32 = (level32 << exp0); /* level in Q31 */ + exp0 = (15 - exp0); + level = (Word16)(level32 >> 16); /* level in Q15 */ + + /* generate white noise vector */ + for (i = 0; i < L_FRAME; i++) { + exc2[i] = (Word16)((D_UTIL_random(&(st->mem_cng_seed)) >> 4)); + } + + /* gain = level / sqrt(ener) * sqrt(L_FRAME) */ + /* energy of generated excitation */ + ener32 = D_UTIL_dot_product12(exc2, exc2, L_FRAME, &exp); + D_UTIL_normalised_inverse_sqrt(&ener32, &exp); + gain = ener32 >> 16; + gain = (level * gain) >> 15; /* gain in Q15 */ + + /* Multiply by sqrt(L_FRAME)=16, i.e. shift left by 4 */ + exp = (Word16)(exp0 + exp + 4); + + if (exp >= 0) { + for (i = 0; i < L_FRAME; i++) { + L_tmp = (exc2[i] * gain) >> 15; /* Q0 * Q15 */ + exc2[i] = (Word16)(L_tmp << exp); + } + } else { + exp = (Word16) - exp; + + for (i = 0; i < L_FRAME; i++) { + L_tmp = (exc2[i] * gain) >> 15; /* Q0 * Q15 */ + exc2[i] = (Word16)(L_tmp >> exp); + } + } + + if (new_state == D_DTX_MUTE) { + /* + * mute comfort noise as it has been quite a long time since + * last SID update was performed + */ + tmp_int_length = st->mem_since_last_sid; + + if (tmp_int_length > 32) { + tmp_int_length = 32; + } + + /* safety guard against division by zero */ + if (tmp_int_length <= 0) { + tmp_int_length = 8; + } + st->mem_true_sid_period_inv = D_UTIL_saturate((0x02000000 / (tmp_int_length << 10))); + st->mem_since_last_sid = 0; + st->mem_log_en_prev = st->mem_log_en; + + /* subtract 1/8 in Q9 (energy), i.e -3/8 dB */ + st->mem_log_en = D_UTIL_saturate(st->mem_log_en - 64); + } + + /* reset interpolation length timer if data has been updated. */ + if ((st->mem_sid_frame != 0) && ((st->mem_valid_data != 0) || + ((st->mem_valid_data == 0) && (st->mem_dtx_hangover_added) != 0))) { + st->mem_since_last_sid = 0; + st->mem_data_updated = 1; + } + + return; +} + + +/* + * D_DTX_activity_update + * + * Parameters: + * st I/O: state struct + * isf O: ISF vector + * exc O: excitation + * + * Function: + * Confort noise generation + * + * Returns: + * void + */ +void D_DTX_activity_update(D_DTX_State *st, Word16 isf[], Word16 exc[]) +{ + + Word32 L_frame_en, log_en; + Word32 i; + Word16 log_en_e, log_en_m; + + st->mem_hist_ptr = (Word16)(st->mem_hist_ptr + 1); + + if (st->mem_hist_ptr == D_DTX_HIST_SIZE) { + st->mem_hist_ptr = 0; + } + + memcpy(&st->mem_isf_buf[st->mem_hist_ptr * M], isf, M * sizeof(Word16)); + + /* compute log energy based on excitation frame energy in Q0 */ + L_frame_en = 0; + + for (i = 0; i < L_FRAME; i++) { + L_frame_en = L_frame_en + (exc[i] * exc[i]); + if (L_frame_en > MAX_31) { + L_frame_en = MAX_31; + break; + } + } + + /* + * log_en = + * (Float32)log10(L_frame_en/(Float32)L_FRAME)/(Float32)log10(2.0f); + */ + D_UTIL_log2(L_frame_en, &log_en_e, &log_en_m); + + /* + * convert exponent and mantissa to Word16 Q7. + * Q7 is used to simplify averaging in dtx_enc + */ + log_en = log_en_e << 7; /* Q7 */ + log_en = log_en + (log_en_m >> (15 - 7)); + + /* Divide by L_FRAME = 256, i.e subtract 8 in Q7 = 1024 */ + log_en = log_en - 1024; + + /* insert into log energy buffer */ + st->mem_log_en_buf[st->mem_hist_ptr] = (Word16)log_en; + + return; +} |