summaryrefslogtreecommitdiff
path: root/libavcodec/aacenc_pred.c (plain)
blob: d111192f0603447ac12352f870369d41926df20a
1/*
2 * AAC encoder main-type prediction
3 * Copyright (C) 2015 Rostislav Pehlivanov
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22/**
23 * @file
24 * AAC encoder main-type prediction
25 * @author Rostislav Pehlivanov ( atomnuker gmail com )
26 */
27
28#include "aactab.h"
29#include "aacenc_pred.h"
30#include "aacenc_utils.h"
31#include "aacenc_is.h" /* <- Needed for common window distortions */
32#include "aacenc_quantization.h"
33
34#define RESTORE_PRED(sce, sfb) \
35 if (sce->ics.prediction_used[sfb]) {\
36 sce->ics.prediction_used[sfb] = 0;\
37 sce->band_type[sfb] = sce->band_alt[sfb];\
38 }
39
40static inline float flt16_round(float pf)
41{
42 union av_intfloat32 tmp;
43 tmp.f = pf;
44 tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
45 return tmp.f;
46}
47
48static inline float flt16_even(float pf)
49{
50 union av_intfloat32 tmp;
51 tmp.f = pf;
52 tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
53 return tmp.f;
54}
55
56static inline float flt16_trunc(float pf)
57{
58 union av_intfloat32 pun;
59 pun.f = pf;
60 pun.i &= 0xFFFF0000U;
61 return pun.f;
62}
63
64static inline void predict(PredictorState *ps, float *coef, float *rcoef, int set)
65{
66 float k2;
67 const float a = 0.953125; // 61.0 / 64
68 const float alpha = 0.90625; // 29.0 / 32
69 const float k1 = ps->k1;
70 const float r0 = ps->r0, r1 = ps->r1;
71 const float cor0 = ps->cor0, cor1 = ps->cor1;
72 const float var0 = ps->var0, var1 = ps->var1;
73 const float e0 = *coef - ps->x_est;
74 const float e1 = e0 - k1 * r0;
75
76 if (set)
77 *coef = e0;
78
79 ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
80 ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1));
81 ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0);
82 ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0));
83 ps->r1 = flt16_trunc(a * (r0 - k1 * e0));
84 ps->r0 = flt16_trunc(a * e0);
85
86 /* Prediction for next frame */
87 ps->k1 = ps->var0 > 1 ? ps->cor0 * flt16_even(a / ps->var0) : 0;
88 k2 = ps->var1 > 1 ? ps->cor1 * flt16_even(a / ps->var1) : 0;
89 *rcoef = ps->x_est = flt16_round(ps->k1*ps->r0 + k2*ps->r1);
90}
91
92static inline void reset_predict_state(PredictorState *ps)
93{
94 ps->r0 = 0.0f;
95 ps->r1 = 0.0f;
96 ps->k1 = 0.0f;
97 ps->cor0 = 0.0f;
98 ps->cor1 = 0.0f;
99 ps->var0 = 1.0f;
100 ps->var1 = 1.0f;
101 ps->x_est = 0.0f;
102}
103
104static inline void reset_all_predictors(PredictorState *ps)
105{
106 int i;
107 for (i = 0; i < MAX_PREDICTORS; i++)
108 reset_predict_state(&ps[i]);
109}
110
111static inline void reset_predictor_group(SingleChannelElement *sce, int group_num)
112{
113 int i;
114 PredictorState *ps = sce->predictor_state;
115 for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
116 reset_predict_state(&ps[i]);
117}
118
119void ff_aac_apply_main_pred(AACEncContext *s, SingleChannelElement *sce)
120{
121 int sfb, k;
122 const int pmax = FFMIN(sce->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
123
124 if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
125 for (sfb = 0; sfb < pmax; sfb++) {
126 for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) {
127 predict(&sce->predictor_state[k], &sce->coeffs[k], &sce->prcoeffs[k],
128 sce->ics.predictor_present && sce->ics.prediction_used[sfb]);
129 }
130 }
131 if (sce->ics.predictor_reset_group) {
132 reset_predictor_group(sce, sce->ics.predictor_reset_group);
133 }
134 } else {
135 reset_all_predictors(sce->predictor_state);
136 }
137}
138
139/* If inc = 0 you can check if this returns 0 to see if you can reset freely */
140static inline int update_counters(IndividualChannelStream *ics, int inc)
141{
142 int i;
143 for (i = 1; i < 31; i++) {
144 ics->predictor_reset_count[i] += inc;
145 if (ics->predictor_reset_count[i] > PRED_RESET_FRAME_MIN)
146 return i; /* Reset this immediately */
147 }
148 return 0;
149}
150
151void ff_aac_adjust_common_pred(AACEncContext *s, ChannelElement *cpe)
152{
153 int start, w, w2, g, i, count = 0;
154 SingleChannelElement *sce0 = &cpe->ch[0];
155 SingleChannelElement *sce1 = &cpe->ch[1];
156 const int pmax0 = FFMIN(sce0->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
157 const int pmax1 = FFMIN(sce1->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
158 const int pmax = FFMIN(pmax0, pmax1);
159
160 if (!cpe->common_window ||
161 sce0->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE ||
162 sce1->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE)
163 return;
164
165 for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
166 start = 0;
167 for (g = 0; g < sce0->ics.num_swb; g++) {
168 int sfb = w*16+g;
169 int sum = sce0->ics.prediction_used[sfb] + sce1->ics.prediction_used[sfb];
170 float ener0 = 0.0f, ener1 = 0.0f, ener01 = 0.0f;
171 struct AACISError ph_err1, ph_err2, *erf;
172 if (sfb < PRED_SFB_START || sfb > pmax || sum != 2) {
173 RESTORE_PRED(sce0, sfb);
174 RESTORE_PRED(sce1, sfb);
175 start += sce0->ics.swb_sizes[g];
176 continue;
177 }
178 for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
179 for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
180 float coef0 = sce0->pcoeffs[start+(w+w2)*128+i];
181 float coef1 = sce1->pcoeffs[start+(w+w2)*128+i];
182 ener0 += coef0*coef0;
183 ener1 += coef1*coef1;
184 ener01 += (coef0 + coef1)*(coef0 + coef1);
185 }
186 }
187 ph_err1 = ff_aac_is_encoding_err(s, cpe, start, w, g,
188 ener0, ener1, ener01, 1, -1);
189 ph_err2 = ff_aac_is_encoding_err(s, cpe, start, w, g,
190 ener0, ener1, ener01, 1, +1);
191 erf = ph_err1.error < ph_err2.error ? &ph_err1 : &ph_err2;
192 if (erf->pass) {
193 sce0->ics.prediction_used[sfb] = 1;
194 sce1->ics.prediction_used[sfb] = 1;
195 count++;
196 } else {
197 RESTORE_PRED(sce0, sfb);
198 RESTORE_PRED(sce1, sfb);
199 }
200 start += sce0->ics.swb_sizes[g];
201 }
202 }
203
204 sce1->ics.predictor_present = sce0->ics.predictor_present = !!count;
205}
206
207static void update_pred_resets(SingleChannelElement *sce)
208{
209 int i, max_group_id_c, max_frame = 0;
210 float avg_frame = 0.0f;
211 IndividualChannelStream *ics = &sce->ics;
212
213 /* Update the counters and immediately update any frame behind schedule */
214 if ((ics->predictor_reset_group = update_counters(&sce->ics, 1)))
215 return;
216
217 for (i = 1; i < 31; i++) {
218 /* Count-based */
219 if (ics->predictor_reset_count[i] > max_frame) {
220 max_group_id_c = i;
221 max_frame = ics->predictor_reset_count[i];
222 }
223 avg_frame = (ics->predictor_reset_count[i] + avg_frame)/2;
224 }
225
226 if (max_frame > PRED_RESET_MIN) {
227 ics->predictor_reset_group = max_group_id_c;
228 } else {
229 ics->predictor_reset_group = 0;
230 }
231}
232
233void ff_aac_search_for_pred(AACEncContext *s, SingleChannelElement *sce)
234{
235 int sfb, i, count = 0, cost_coeffs = 0, cost_pred = 0;
236 const int pmax = FFMIN(sce->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
237 float *O34 = &s->scoefs[128*0], *P34 = &s->scoefs[128*1];
238 float *SENT = &s->scoefs[128*2], *S34 = &s->scoefs[128*3];
239 float *QERR = &s->scoefs[128*4];
240
241 if (sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
242 sce->ics.predictor_present = 0;
243 return;
244 }
245
246 if (!sce->ics.predictor_initialized) {
247 reset_all_predictors(sce->predictor_state);
248 sce->ics.predictor_initialized = 1;
249 memcpy(sce->prcoeffs, sce->coeffs, 1024*sizeof(float));
250 for (i = 1; i < 31; i++)
251 sce->ics.predictor_reset_count[i] = i;
252 }
253
254 update_pred_resets(sce);
255 memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type));
256
257 for (sfb = PRED_SFB_START; sfb < pmax; sfb++) {
258 int cost1, cost2, cb_p;
259 float dist1, dist2, dist_spec_err = 0.0f;
260 const int cb_n = sce->zeroes[sfb] ? 0 : sce->band_type[sfb];
261 const int cb_min = sce->zeroes[sfb] ? 0 : 1;
262 const int cb_max = sce->zeroes[sfb] ? 0 : RESERVED_BT;
263 const int start_coef = sce->ics.swb_offset[sfb];
264 const int num_coeffs = sce->ics.swb_offset[sfb + 1] - start_coef;
265 const FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[sfb];
266
267 if (start_coef + num_coeffs > MAX_PREDICTORS ||
268 (s->cur_channel && sce->band_type[sfb] >= INTENSITY_BT2) ||
269 sce->band_type[sfb] == NOISE_BT)
270 continue;
271
272 /* Normal coefficients */
273 s->abs_pow34(O34, &sce->coeffs[start_coef], num_coeffs);
274 dist1 = quantize_and_encode_band_cost(s, NULL, &sce->coeffs[start_coef], NULL,
275 O34, num_coeffs, sce->sf_idx[sfb],
276 cb_n, s->lambda / band->threshold, INFINITY, &cost1, NULL, 0);
277 cost_coeffs += cost1;
278
279 /* Encoded coefficients - needed for #bits, band type and quant. error */
280 for (i = 0; i < num_coeffs; i++)
281 SENT[i] = sce->coeffs[start_coef + i] - sce->prcoeffs[start_coef + i];
282 s->abs_pow34(S34, SENT, num_coeffs);
283 if (cb_n < RESERVED_BT)
284 cb_p = av_clip(find_min_book(find_max_val(1, num_coeffs, S34), sce->sf_idx[sfb]), cb_min, cb_max);
285 else
286 cb_p = cb_n;
287 quantize_and_encode_band_cost(s, NULL, SENT, QERR, S34, num_coeffs,
288 sce->sf_idx[sfb], cb_p, s->lambda / band->threshold, INFINITY,
289 &cost2, NULL, 0);
290
291 /* Reconstructed coefficients - needed for distortion measurements */
292 for (i = 0; i < num_coeffs; i++)
293 sce->prcoeffs[start_coef + i] += QERR[i] != 0.0f ? (sce->prcoeffs[start_coef + i] - QERR[i]) : 0.0f;
294 s->abs_pow34(P34, &sce->prcoeffs[start_coef], num_coeffs);
295 if (cb_n < RESERVED_BT)
296 cb_p = av_clip(find_min_book(find_max_val(1, num_coeffs, P34), sce->sf_idx[sfb]), cb_min, cb_max);
297 else
298 cb_p = cb_n;
299 dist2 = quantize_and_encode_band_cost(s, NULL, &sce->prcoeffs[start_coef], NULL,
300 P34, num_coeffs, sce->sf_idx[sfb],
301 cb_p, s->lambda / band->threshold, INFINITY, NULL, NULL, 0);
302 for (i = 0; i < num_coeffs; i++)
303 dist_spec_err += (O34[i] - P34[i])*(O34[i] - P34[i]);
304 dist_spec_err *= s->lambda / band->threshold;
305 dist2 += dist_spec_err;
306
307 if (dist2 <= dist1 && cb_p <= cb_n) {
308 cost_pred += cost2;
309 sce->ics.prediction_used[sfb] = 1;
310 sce->band_alt[sfb] = cb_n;
311 sce->band_type[sfb] = cb_p;
312 count++;
313 } else {
314 cost_pred += cost1;
315 sce->band_alt[sfb] = cb_p;
316 }
317 }
318
319 if (count && cost_coeffs < cost_pred) {
320 count = 0;
321 for (sfb = PRED_SFB_START; sfb < pmax; sfb++)
322 RESTORE_PRED(sce, sfb);
323 memset(&sce->ics.prediction_used, 0, sizeof(sce->ics.prediction_used));
324 }
325
326 sce->ics.predictor_present = !!count;
327}
328
329/**
330 * Encoder predictors data.
331 */
332void ff_aac_encode_main_pred(AACEncContext *s, SingleChannelElement *sce)
333{
334 int sfb;
335 IndividualChannelStream *ics = &sce->ics;
336 const int pmax = FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
337
338 if (s->profile != FF_PROFILE_AAC_MAIN ||
339 !ics->predictor_present)
340 return;
341
342 put_bits(&s->pb, 1, !!ics->predictor_reset_group);
343 if (ics->predictor_reset_group)
344 put_bits(&s->pb, 5, ics->predictor_reset_group);
345 for (sfb = 0; sfb < pmax; sfb++)
346 put_bits(&s->pb, 1, ics->prediction_used[sfb]);
347}
348