blob: 8e1bc88a8538f41e71a1bd1e4168c78c04d27fa5
1 | /* |
2 | * AAC encoder twoloop coder |
3 | * Copyright (C) 2008-2009 Konstantin Shishkov |
4 | * |
5 | * This file is part of FFmpeg. |
6 | * |
7 | * FFmpeg is free software; you can redistribute it and/or |
8 | * modify it under the terms of the GNU Lesser General Public |
9 | * License as published by the Free Software Foundation; either |
10 | * version 2.1 of the License, or (at your option) any later version. |
11 | * |
12 | * FFmpeg is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | * Lesser General Public License for more details. |
16 | * |
17 | * You should have received a copy of the GNU Lesser General Public |
18 | * License along with FFmpeg; if not, write to the Free Software |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | */ |
21 | |
22 | /** |
23 | * @file |
24 | * AAC encoder twoloop coder |
25 | * @author Konstantin Shishkov, Claudio Freire |
26 | */ |
27 | |
28 | /** |
29 | * This file contains a template for the twoloop coder function. |
30 | * It needs to be provided, externally, as an already included declaration, |
31 | * the following functions from aacenc_quantization/util.h. They're not included |
32 | * explicitly here to make it possible to provide alternative implementations: |
33 | * - quantize_band_cost |
34 | * - abs_pow34_v |
35 | * - find_max_val |
36 | * - find_min_book |
37 | * - find_form_factor |
38 | */ |
39 | |
40 | #ifndef AVCODEC_AACCODER_TWOLOOP_H |
41 | #define AVCODEC_AACCODER_TWOLOOP_H |
42 | |
43 | #include <float.h> |
44 | #include "libavutil/mathematics.h" |
45 | #include "mathops.h" |
46 | #include "avcodec.h" |
47 | #include "put_bits.h" |
48 | #include "aac.h" |
49 | #include "aacenc.h" |
50 | #include "aactab.h" |
51 | #include "aacenctab.h" |
52 | |
53 | /** Frequency in Hz for lower limit of noise substitution **/ |
54 | #define NOISE_LOW_LIMIT 4000 |
55 | |
56 | #define sclip(x) av_clip(x,60,218) |
57 | |
58 | /* Reflects the cost to change codebooks */ |
59 | static inline int ff_pns_bits(SingleChannelElement *sce, int w, int g) |
60 | { |
61 | return (!g || !sce->zeroes[w*16+g-1] || !sce->can_pns[w*16+g-1]) ? 9 : 5; |
62 | } |
63 | |
64 | /** |
65 | * two-loop quantizers search taken from ISO 13818-7 Appendix C |
66 | */ |
67 | static void search_for_quantizers_twoloop(AVCodecContext *avctx, |
68 | AACEncContext *s, |
69 | SingleChannelElement *sce, |
70 | const float lambda) |
71 | { |
72 | int start = 0, i, w, w2, g, recomprd; |
73 | int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate |
74 | / ((avctx->flags & AV_CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels) |
75 | * (lambda / 120.f); |
76 | int refbits = destbits; |
77 | int toomanybits, toofewbits; |
78 | char nzs[128]; |
79 | uint8_t nextband[128]; |
80 | int maxsf[128], minsf[128]; |
81 | float dists[128] = { 0 }, qenergies[128] = { 0 }, uplims[128], euplims[128], energies[128]; |
82 | float maxvals[128], spread_thr_r[128]; |
83 | float min_spread_thr_r, max_spread_thr_r; |
84 | |
85 | /** |
86 | * rdlambda controls the maximum tolerated distortion. Twoloop |
87 | * will keep iterating until it fails to lower it or it reaches |
88 | * ulimit * rdlambda. Keeping it low increases quality on difficult |
89 | * signals, but lower it too much, and bits will be taken from weak |
90 | * signals, creating "holes". A balance is necessary. |
91 | * rdmax and rdmin specify the relative deviation from rdlambda |
92 | * allowed for tonality compensation |
93 | */ |
94 | float rdlambda = av_clipf(2.0f * 120.f / lambda, 0.0625f, 16.0f); |
95 | const float nzslope = 1.5f; |
96 | float rdmin = 0.03125f; |
97 | float rdmax = 1.0f; |
98 | |
99 | /** |
100 | * sfoffs controls an offset of optmium allocation that will be |
101 | * applied based on lambda. Keep it real and modest, the loop |
102 | * will take care of the rest, this just accelerates convergence |
103 | */ |
104 | float sfoffs = av_clipf(log2f(120.0f / lambda) * 4.0f, -5, 10); |
105 | |
106 | int fflag, minscaler, maxscaler, nminscaler; |
107 | int its = 0; |
108 | int maxits = 30; |
109 | int allz = 0; |
110 | int tbits; |
111 | int cutoff = 1024; |
112 | int pns_start_pos; |
113 | int prev; |
114 | |
115 | /** |
116 | * zeroscale controls a multiplier of the threshold, if band energy |
117 | * is below this, a zero is forced. Keep it lower than 1, unless |
118 | * low lambda is used, because energy < threshold doesn't mean there's |
119 | * no audible signal outright, it's just energy. Also make it rise |
120 | * slower than rdlambda, as rdscale has due compensation with |
121 | * noisy band depriorization below, whereas zeroing logic is rather dumb |
122 | */ |
123 | float zeroscale; |
124 | if (lambda > 120.f) { |
125 | zeroscale = av_clipf(powf(120.f / lambda, 0.25f), 0.0625f, 1.0f); |
126 | } else { |
127 | zeroscale = 1.f; |
128 | } |
129 | |
130 | if (s->psy.bitres.alloc >= 0) { |
131 | /** |
132 | * Psy granted us extra bits to use, from the reservoire |
133 | * adjust for lambda except what psy already did |
134 | */ |
135 | destbits = s->psy.bitres.alloc |
136 | * (lambda / (avctx->global_quality ? avctx->global_quality : 120)); |
137 | } |
138 | |
139 | if (avctx->flags & AV_CODEC_FLAG_QSCALE) { |
140 | /** |
141 | * Constant Q-scale doesn't compensate MS coding on its own |
142 | * No need to be overly precise, this only controls RD |
143 | * adjustment CB limits when going overboard |
144 | */ |
145 | if (s->options.mid_side && s->cur_type == TYPE_CPE) |
146 | destbits *= 2; |
147 | |
148 | /** |
149 | * When using a constant Q-scale, don't adjust bits, just use RD |
150 | * Don't let it go overboard, though... 8x psy target is enough |
151 | */ |
152 | toomanybits = 5800; |
153 | toofewbits = destbits / 16; |
154 | |
155 | /** Don't offset scalers, just RD */ |
156 | sfoffs = sce->ics.num_windows - 1; |
157 | rdlambda = sqrtf(rdlambda); |
158 | |
159 | /** search further */ |
160 | maxits *= 2; |
161 | } else { |
162 | /* When using ABR, be strict, but a reasonable leeway is |
163 | * critical to allow RC to smoothly track desired bitrate |
164 | * without sudden quality drops that cause audible artifacts. |
165 | * Symmetry is also desirable, to avoid systematic bias. |
166 | */ |
167 | toomanybits = destbits + destbits/8; |
168 | toofewbits = destbits - destbits/8; |
169 | |
170 | sfoffs = 0; |
171 | rdlambda = sqrtf(rdlambda); |
172 | } |
173 | |
174 | /** and zero out above cutoff frequency */ |
175 | { |
176 | int wlen = 1024 / sce->ics.num_windows; |
177 | int bandwidth; |
178 | |
179 | /** |
180 | * Scale, psy gives us constant quality, this LP only scales |
181 | * bitrate by lambda, so we save bits on subjectively unimportant HF |
182 | * rather than increase quantization noise. Adjust nominal bitrate |
183 | * to effective bitrate according to encoding parameters, |
184 | * AAC_CUTOFF_FROM_BITRATE is calibrated for effective bitrate. |
185 | */ |
186 | float rate_bandwidth_multiplier = 1.5f; |
187 | int frame_bit_rate = (avctx->flags & AV_CODEC_FLAG_QSCALE) |
188 | ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024) |
189 | : (avctx->bit_rate / avctx->channels); |
190 | |
191 | /** Compensate for extensions that increase efficiency */ |
192 | if (s->options.pns || s->options.intensity_stereo) |
193 | frame_bit_rate *= 1.15f; |
194 | |
195 | if (avctx->cutoff > 0) { |
196 | bandwidth = avctx->cutoff; |
197 | } else { |
198 | bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate)); |
199 | s->psy.cutoff = bandwidth; |
200 | } |
201 | |
202 | cutoff = bandwidth * 2 * wlen / avctx->sample_rate; |
203 | pns_start_pos = NOISE_LOW_LIMIT * 2 * wlen / avctx->sample_rate; |
204 | } |
205 | |
206 | /** |
207 | * for values above this the decoder might end up in an endless loop |
208 | * due to always having more bits than what can be encoded. |
209 | */ |
210 | destbits = FFMIN(destbits, 5800); |
211 | toomanybits = FFMIN(toomanybits, 5800); |
212 | toofewbits = FFMIN(toofewbits, 5800); |
213 | /** |
214 | * XXX: some heuristic to determine initial quantizers will reduce search time |
215 | * determine zero bands and upper distortion limits |
216 | */ |
217 | min_spread_thr_r = -1; |
218 | max_spread_thr_r = -1; |
219 | for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { |
220 | for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) { |
221 | int nz = 0; |
222 | float uplim = 0.0f, energy = 0.0f, spread = 0.0f; |
223 | for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { |
224 | FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; |
225 | if (start >= cutoff || band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f) { |
226 | sce->zeroes[(w+w2)*16+g] = 1; |
227 | continue; |
228 | } |
229 | nz = 1; |
230 | } |
231 | if (!nz) { |
232 | uplim = 0.0f; |
233 | } else { |
234 | nz = 0; |
235 | for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { |
236 | FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; |
237 | if (band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f) |
238 | continue; |
239 | uplim += band->threshold; |
240 | energy += band->energy; |
241 | spread += band->spread; |
242 | nz++; |
243 | } |
244 | } |
245 | uplims[w*16+g] = uplim; |
246 | energies[w*16+g] = energy; |
247 | nzs[w*16+g] = nz; |
248 | sce->zeroes[w*16+g] = !nz; |
249 | allz |= nz; |
250 | if (nz && sce->can_pns[w*16+g]) { |
251 | spread_thr_r[w*16+g] = energy * nz / (uplim * spread); |
252 | if (min_spread_thr_r < 0) { |
253 | min_spread_thr_r = max_spread_thr_r = spread_thr_r[w*16+g]; |
254 | } else { |
255 | min_spread_thr_r = FFMIN(min_spread_thr_r, spread_thr_r[w*16+g]); |
256 | max_spread_thr_r = FFMAX(max_spread_thr_r, spread_thr_r[w*16+g]); |
257 | } |
258 | } |
259 | } |
260 | } |
261 | |
262 | /** Compute initial scalers */ |
263 | minscaler = 65535; |
264 | for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { |
265 | for (g = 0; g < sce->ics.num_swb; g++) { |
266 | if (sce->zeroes[w*16+g]) { |
267 | sce->sf_idx[w*16+g] = SCALE_ONE_POS; |
268 | continue; |
269 | } |
270 | /** |
271 | * log2f-to-distortion ratio is, technically, 2 (1.5db = 4, but it's power vs level so it's 2). |
272 | * But, as offsets are applied, low-frequency signals are too sensitive to the induced distortion, |
273 | * so we make scaling more conservative by choosing a lower log2f-to-distortion ratio, and thus |
274 | * more robust. |
275 | */ |
276 | sce->sf_idx[w*16+g] = av_clip( |
277 | SCALE_ONE_POS |
278 | + 1.75*log2f(FFMAX(0.00125f,uplims[w*16+g]) / sce->ics.swb_sizes[g]) |
279 | + sfoffs, |
280 | 60, SCALE_MAX_POS); |
281 | minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]); |
282 | } |
283 | } |
284 | |
285 | /** Clip */ |
286 | minscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512); |
287 | for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) |
288 | for (g = 0; g < sce->ics.num_swb; g++) |
289 | if (!sce->zeroes[w*16+g]) |
290 | sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF - 1); |
291 | |
292 | if (!allz) |
293 | return; |
294 | s->abs_pow34(s->scoefs, sce->coeffs, 1024); |
295 | ff_quantize_band_cost_cache_init(s); |
296 | |
297 | for (i = 0; i < sizeof(minsf) / sizeof(minsf[0]); ++i) |
298 | minsf[i] = 0; |
299 | for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { |
300 | start = w*128; |
301 | for (g = 0; g < sce->ics.num_swb; g++) { |
302 | const float *scaled = s->scoefs + start; |
303 | int minsfidx; |
304 | maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled); |
305 | if (maxvals[w*16+g] > 0) { |
306 | minsfidx = coef2minsf(maxvals[w*16+g]); |
307 | for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) |
308 | minsf[(w+w2)*16+g] = minsfidx; |
309 | } |
310 | start += sce->ics.swb_sizes[g]; |
311 | } |
312 | } |
313 | |
314 | /** |
315 | * Scale uplims to match rate distortion to quality |
316 | * bu applying noisy band depriorization and tonal band priorization. |
317 | * Maxval-energy ratio gives us an idea of how noisy/tonal the band is. |
318 | * If maxval^2 ~ energy, then that band is mostly noise, and we can relax |
319 | * rate distortion requirements. |
320 | */ |
321 | memcpy(euplims, uplims, sizeof(euplims)); |
322 | for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { |
323 | /** psy already priorizes transients to some extent */ |
324 | float de_psy_factor = (sce->ics.num_windows > 1) ? 8.0f / sce->ics.group_len[w] : 1.0f; |
325 | start = w*128; |
326 | for (g = 0; g < sce->ics.num_swb; g++) { |
327 | if (nzs[g] > 0) { |
328 | float cleanup_factor = ff_sqrf(av_clipf(start / (cutoff * 0.75f), 1.0f, 2.0f)); |
329 | float energy2uplim = find_form_factor( |
330 | sce->ics.group_len[w], sce->ics.swb_sizes[g], |
331 | uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]), |
332 | sce->coeffs + start, |
333 | nzslope * cleanup_factor); |
334 | energy2uplim *= de_psy_factor; |
335 | if (!(avctx->flags & AV_CODEC_FLAG_QSCALE)) { |
336 | /** In ABR, we need to priorize less and let rate control do its thing */ |
337 | energy2uplim = sqrtf(energy2uplim); |
338 | } |
339 | energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim)); |
340 | uplims[w*16+g] *= av_clipf(rdlambda * energy2uplim, rdmin, rdmax) |
341 | * sce->ics.group_len[w]; |
342 | |
343 | energy2uplim = find_form_factor( |
344 | sce->ics.group_len[w], sce->ics.swb_sizes[g], |
345 | uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]), |
346 | sce->coeffs + start, |
347 | 2.0f); |
348 | energy2uplim *= de_psy_factor; |
349 | if (!(avctx->flags & AV_CODEC_FLAG_QSCALE)) { |
350 | /** In ABR, we need to priorize less and let rate control do its thing */ |
351 | energy2uplim = sqrtf(energy2uplim); |
352 | } |
353 | energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim)); |
354 | euplims[w*16+g] *= av_clipf(rdlambda * energy2uplim * sce->ics.group_len[w], |
355 | 0.5f, 1.0f); |
356 | } |
357 | start += sce->ics.swb_sizes[g]; |
358 | } |
359 | } |
360 | |
361 | for (i = 0; i < sizeof(maxsf) / sizeof(maxsf[0]); ++i) |
362 | maxsf[i] = SCALE_MAX_POS; |
363 | |
364 | //perform two-loop search |
365 | //outer loop - improve quality |
366 | do { |
367 | //inner loop - quantize spectrum to fit into given number of bits |
368 | int overdist; |
369 | int qstep = its ? 1 : 32; |
370 | do { |
371 | int changed = 0; |
372 | prev = -1; |
373 | recomprd = 0; |
374 | tbits = 0; |
375 | for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { |
376 | start = w*128; |
377 | for (g = 0; g < sce->ics.num_swb; g++) { |
378 | const float *coefs = &sce->coeffs[start]; |
379 | const float *scaled = &s->scoefs[start]; |
380 | int bits = 0; |
381 | int cb; |
382 | float dist = 0.0f; |
383 | float qenergy = 0.0f; |
384 | |
385 | if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) { |
386 | start += sce->ics.swb_sizes[g]; |
387 | if (sce->can_pns[w*16+g]) { |
388 | /** PNS isn't free */ |
389 | tbits += ff_pns_bits(sce, w, g); |
390 | } |
391 | continue; |
392 | } |
393 | cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]); |
394 | for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { |
395 | int b; |
396 | float sqenergy; |
397 | dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128, |
398 | scaled + w2*128, |
399 | sce->ics.swb_sizes[g], |
400 | sce->sf_idx[w*16+g], |
401 | cb, |
402 | 1.0f, |
403 | INFINITY, |
404 | &b, &sqenergy, |
405 | 0); |
406 | bits += b; |
407 | qenergy += sqenergy; |
408 | } |
409 | dists[w*16+g] = dist - bits; |
410 | qenergies[w*16+g] = qenergy; |
411 | if (prev != -1) { |
412 | int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF); |
413 | bits += ff_aac_scalefactor_bits[sfdiff]; |
414 | } |
415 | tbits += bits; |
416 | start += sce->ics.swb_sizes[g]; |
417 | prev = sce->sf_idx[w*16+g]; |
418 | } |
419 | } |
420 | if (tbits > toomanybits) { |
421 | recomprd = 1; |
422 | for (i = 0; i < 128; i++) { |
423 | if (sce->sf_idx[i] < (SCALE_MAX_POS - SCALE_DIV_512)) { |
424 | int maxsf_i = (tbits > 5800) ? SCALE_MAX_POS : maxsf[i]; |
425 | int new_sf = FFMIN(maxsf_i, sce->sf_idx[i] + qstep); |
426 | if (new_sf != sce->sf_idx[i]) { |
427 | sce->sf_idx[i] = new_sf; |
428 | changed = 1; |
429 | } |
430 | } |
431 | } |
432 | } else if (tbits < toofewbits) { |
433 | recomprd = 1; |
434 | for (i = 0; i < 128; i++) { |
435 | if (sce->sf_idx[i] > SCALE_ONE_POS) { |
436 | int new_sf = FFMAX3(minsf[i], SCALE_ONE_POS, sce->sf_idx[i] - qstep); |
437 | if (new_sf != sce->sf_idx[i]) { |
438 | sce->sf_idx[i] = new_sf; |
439 | changed = 1; |
440 | } |
441 | } |
442 | } |
443 | } |
444 | qstep >>= 1; |
445 | if (!qstep && tbits > toomanybits && sce->sf_idx[0] < 217 && changed) |
446 | qstep = 1; |
447 | } while (qstep); |
448 | |
449 | overdist = 1; |
450 | fflag = tbits < toofewbits; |
451 | for (i = 0; i < 2 && (overdist || recomprd); ++i) { |
452 | if (recomprd) { |
453 | /** Must recompute distortion */ |
454 | prev = -1; |
455 | tbits = 0; |
456 | for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { |
457 | start = w*128; |
458 | for (g = 0; g < sce->ics.num_swb; g++) { |
459 | const float *coefs = sce->coeffs + start; |
460 | const float *scaled = s->scoefs + start; |
461 | int bits = 0; |
462 | int cb; |
463 | float dist = 0.0f; |
464 | float qenergy = 0.0f; |
465 | |
466 | if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) { |
467 | start += sce->ics.swb_sizes[g]; |
468 | if (sce->can_pns[w*16+g]) { |
469 | /** PNS isn't free */ |
470 | tbits += ff_pns_bits(sce, w, g); |
471 | } |
472 | continue; |
473 | } |
474 | cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]); |
475 | for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { |
476 | int b; |
477 | float sqenergy; |
478 | dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128, |
479 | scaled + w2*128, |
480 | sce->ics.swb_sizes[g], |
481 | sce->sf_idx[w*16+g], |
482 | cb, |
483 | 1.0f, |
484 | INFINITY, |
485 | &b, &sqenergy, |
486 | 0); |
487 | bits += b; |
488 | qenergy += sqenergy; |
489 | } |
490 | dists[w*16+g] = dist - bits; |
491 | qenergies[w*16+g] = qenergy; |
492 | if (prev != -1) { |
493 | int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF); |
494 | bits += ff_aac_scalefactor_bits[sfdiff]; |
495 | } |
496 | tbits += bits; |
497 | start += sce->ics.swb_sizes[g]; |
498 | prev = sce->sf_idx[w*16+g]; |
499 | } |
500 | } |
501 | } |
502 | if (!i && s->options.pns && its > maxits/2 && tbits > toofewbits) { |
503 | float maxoverdist = 0.0f; |
504 | float ovrfactor = 1.f+(maxits-its)*16.f/maxits; |
505 | overdist = recomprd = 0; |
506 | for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { |
507 | for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) { |
508 | if (!sce->zeroes[w*16+g] && sce->sf_idx[w*16+g] > SCALE_ONE_POS && dists[w*16+g] > uplims[w*16+g]*ovrfactor) { |
509 | float ovrdist = dists[w*16+g] / FFMAX(uplims[w*16+g],euplims[w*16+g]); |
510 | maxoverdist = FFMAX(maxoverdist, ovrdist); |
511 | overdist++; |
512 | } |
513 | } |
514 | } |
515 | if (overdist) { |
516 | /* We have overdistorted bands, trade for zeroes (that can be noise) |
517 | * Zero the bands in the lowest 1.25% spread-energy-threshold ranking |
518 | */ |
519 | float minspread = max_spread_thr_r; |
520 | float maxspread = min_spread_thr_r; |
521 | float zspread; |
522 | int zeroable = 0; |
523 | int zeroed = 0; |
524 | int maxzeroed, zloop; |
525 | for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { |
526 | for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) { |
527 | if (start >= pns_start_pos && !sce->zeroes[w*16+g] && sce->can_pns[w*16+g]) { |
528 | minspread = FFMIN(minspread, spread_thr_r[w*16+g]); |
529 | maxspread = FFMAX(maxspread, spread_thr_r[w*16+g]); |
530 | zeroable++; |
531 | } |
532 | } |
533 | } |
534 | zspread = (maxspread-minspread) * 0.0125f + minspread; |
535 | /* Don't PNS everything even if allowed. It suppresses bit starvation signals from RC, |
536 | * and forced the hand of the later search_for_pns step. |
537 | * Instead, PNS a fraction of the spread_thr_r range depending on how starved for bits we are, |
538 | * and leave further PNSing to search_for_pns if worthwhile. |
539 | */ |
540 | zspread = FFMIN3(min_spread_thr_r * 8.f, zspread, |
541 | ((toomanybits - tbits) * min_spread_thr_r + (tbits - toofewbits) * max_spread_thr_r) / (toomanybits - toofewbits + 1)); |
542 | maxzeroed = FFMIN(zeroable, FFMAX(1, (zeroable * its + maxits - 1) / (2 * maxits))); |
543 | for (zloop = 0; zloop < 2; zloop++) { |
544 | /* Two passes: first distorted stuff - two birds in one shot and all that, |
545 | * then anything viable. Viable means not zero, but either CB=zero-able |
546 | * (too high SF), not SF <= 1 (that means we'd be operating at very high |
547 | * quality, we don't want PNS when doing VHQ), PNS allowed, and within |
548 | * the lowest ranking percentile. |
549 | */ |
550 | float loopovrfactor = (zloop) ? 1.0f : ovrfactor; |
551 | int loopminsf = (zloop) ? (SCALE_ONE_POS - SCALE_DIV_512) : SCALE_ONE_POS; |
552 | int mcb; |
553 | for (g = sce->ics.num_swb-1; g > 0 && zeroed < maxzeroed; g--) { |
554 | if (sce->ics.swb_offset[g] < pns_start_pos) |
555 | continue; |
556 | for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { |
557 | if (!sce->zeroes[w*16+g] && sce->can_pns[w*16+g] && spread_thr_r[w*16+g] <= zspread |
558 | && sce->sf_idx[w*16+g] > loopminsf |
559 | && (dists[w*16+g] > loopovrfactor*uplims[w*16+g] || !(mcb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g])) |
560 | || (mcb <= 1 && dists[w*16+g] > FFMIN(uplims[w*16+g], euplims[w*16+g]))) ) { |
561 | sce->zeroes[w*16+g] = 1; |
562 | sce->band_type[w*16+g] = 0; |
563 | zeroed++; |
564 | } |
565 | } |
566 | } |
567 | } |
568 | if (zeroed) |
569 | recomprd = fflag = 1; |
570 | } else { |
571 | overdist = 0; |
572 | } |
573 | } |
574 | } |
575 | |
576 | minscaler = SCALE_MAX_POS; |
577 | maxscaler = 0; |
578 | for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { |
579 | for (g = 0; g < sce->ics.num_swb; g++) { |
580 | if (!sce->zeroes[w*16+g]) { |
581 | minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]); |
582 | maxscaler = FFMAX(maxscaler, sce->sf_idx[w*16+g]); |
583 | } |
584 | } |
585 | } |
586 | |
587 | minscaler = nminscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512); |
588 | prev = -1; |
589 | for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { |
590 | /** Start with big steps, end up fine-tunning */ |
591 | int depth = (its > maxits/2) ? ((its > maxits*2/3) ? 1 : 3) : 10; |
592 | int edepth = depth+2; |
593 | float uplmax = its / (maxits*0.25f) + 1.0f; |
594 | uplmax *= (tbits > destbits) ? FFMIN(2.0f, tbits / (float)FFMAX(1,destbits)) : 1.0f; |
595 | start = w * 128; |
596 | for (g = 0; g < sce->ics.num_swb; g++) { |
597 | int prevsc = sce->sf_idx[w*16+g]; |
598 | if (prev < 0 && !sce->zeroes[w*16+g]) |
599 | prev = sce->sf_idx[0]; |
600 | if (!sce->zeroes[w*16+g]) { |
601 | const float *coefs = sce->coeffs + start; |
602 | const float *scaled = s->scoefs + start; |
603 | int cmb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]); |
604 | int mindeltasf = FFMAX(0, prev - SCALE_MAX_DIFF); |
605 | int maxdeltasf = FFMIN(SCALE_MAX_POS - SCALE_DIV_512, prev + SCALE_MAX_DIFF); |
606 | if ((!cmb || dists[w*16+g] > uplims[w*16+g]) && sce->sf_idx[w*16+g] > FFMAX(mindeltasf, minsf[w*16+g])) { |
607 | /* Try to make sure there is some energy in every nonzero band |
608 | * NOTE: This algorithm must be forcibly imbalanced, pushing harder |
609 | * on holes or more distorted bands at first, otherwise there's |
610 | * no net gain (since the next iteration will offset all bands |
611 | * on the opposite direction to compensate for extra bits) |
612 | */ |
613 | for (i = 0; i < edepth && sce->sf_idx[w*16+g] > mindeltasf; ++i) { |
614 | int cb, bits; |
615 | float dist, qenergy; |
616 | int mb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1); |
617 | cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]); |
618 | dist = qenergy = 0.f; |
619 | bits = 0; |
620 | if (!cb) { |
621 | maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g]-1, maxsf[w*16+g]); |
622 | } else if (i >= depth && dists[w*16+g] < euplims[w*16+g]) { |
623 | break; |
624 | } |
625 | /* !g is the DC band, it's important, since quantization error here |
626 | * applies to less than a cycle, it creates horrible intermodulation |
627 | * distortion if it doesn't stick to what psy requests |
628 | */ |
629 | if (!g && sce->ics.num_windows > 1 && dists[w*16+g] >= euplims[w*16+g]) |
630 | maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g], maxsf[w*16+g]); |
631 | for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { |
632 | int b; |
633 | float sqenergy; |
634 | dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128, |
635 | scaled + w2*128, |
636 | sce->ics.swb_sizes[g], |
637 | sce->sf_idx[w*16+g]-1, |
638 | cb, |
639 | 1.0f, |
640 | INFINITY, |
641 | &b, &sqenergy, |
642 | 0); |
643 | bits += b; |
644 | qenergy += sqenergy; |
645 | } |
646 | sce->sf_idx[w*16+g]--; |
647 | dists[w*16+g] = dist - bits; |
648 | qenergies[w*16+g] = qenergy; |
649 | if (mb && (sce->sf_idx[w*16+g] < mindeltasf || ( |
650 | (dists[w*16+g] < FFMIN(uplmax*uplims[w*16+g], euplims[w*16+g])) |
651 | && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g]) |
652 | ) )) { |
653 | break; |
654 | } |
655 | } |
656 | } else if (tbits > toofewbits && sce->sf_idx[w*16+g] < FFMIN(maxdeltasf, maxsf[w*16+g]) |
657 | && (dists[w*16+g] < FFMIN(euplims[w*16+g], uplims[w*16+g])) |
658 | && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g]) |
659 | ) { |
660 | /** Um... over target. Save bits for more important stuff. */ |
661 | for (i = 0; i < depth && sce->sf_idx[w*16+g] < maxdeltasf; ++i) { |
662 | int cb, bits; |
663 | float dist, qenergy; |
664 | cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]+1); |
665 | if (cb > 0) { |
666 | dist = qenergy = 0.f; |
667 | bits = 0; |
668 | for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { |
669 | int b; |
670 | float sqenergy; |
671 | dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128, |
672 | scaled + w2*128, |
673 | sce->ics.swb_sizes[g], |
674 | sce->sf_idx[w*16+g]+1, |
675 | cb, |
676 | 1.0f, |
677 | INFINITY, |
678 | &b, &sqenergy, |
679 | 0); |
680 | bits += b; |
681 | qenergy += sqenergy; |
682 | } |
683 | dist -= bits; |
684 | if (dist < FFMIN(euplims[w*16+g], uplims[w*16+g])) { |
685 | sce->sf_idx[w*16+g]++; |
686 | dists[w*16+g] = dist; |
687 | qenergies[w*16+g] = qenergy; |
688 | } else { |
689 | break; |
690 | } |
691 | } else { |
692 | maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g], maxsf[w*16+g]); |
693 | break; |
694 | } |
695 | } |
696 | } |
697 | prev = sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], mindeltasf, maxdeltasf); |
698 | if (sce->sf_idx[w*16+g] != prevsc) |
699 | fflag = 1; |
700 | nminscaler = FFMIN(nminscaler, sce->sf_idx[w*16+g]); |
701 | sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]); |
702 | } |
703 | start += sce->ics.swb_sizes[g]; |
704 | } |
705 | } |
706 | |
707 | /** SF difference limit violation risk. Must re-clamp. */ |
708 | prev = -1; |
709 | for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { |
710 | for (g = 0; g < sce->ics.num_swb; g++) { |
711 | if (!sce->zeroes[w*16+g]) { |
712 | int prevsf = sce->sf_idx[w*16+g]; |
713 | if (prev < 0) |
714 | prev = prevsf; |
715 | sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], prev - SCALE_MAX_DIFF, prev + SCALE_MAX_DIFF); |
716 | sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]); |
717 | prev = sce->sf_idx[w*16+g]; |
718 | if (!fflag && prevsf != sce->sf_idx[w*16+g]) |
719 | fflag = 1; |
720 | } |
721 | } |
722 | } |
723 | |
724 | its++; |
725 | } while (fflag && its < maxits); |
726 | |
727 | /** Scout out next nonzero bands */ |
728 | ff_init_nextband_map(sce, nextband); |
729 | |
730 | prev = -1; |
731 | for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { |
732 | /** Make sure proper codebooks are set */ |
733 | for (g = 0; g < sce->ics.num_swb; g++) { |
734 | if (!sce->zeroes[w*16+g]) { |
735 | sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]); |
736 | if (sce->band_type[w*16+g] <= 0) { |
737 | if (!ff_sfdelta_can_remove_band(sce, nextband, prev, w*16+g)) { |
738 | /** Cannot zero out, make sure it's not attempted */ |
739 | sce->band_type[w*16+g] = 1; |
740 | } else { |
741 | sce->zeroes[w*16+g] = 1; |
742 | sce->band_type[w*16+g] = 0; |
743 | } |
744 | } |
745 | } else { |
746 | sce->band_type[w*16+g] = 0; |
747 | } |
748 | /** Check that there's no SF delta range violations */ |
749 | if (!sce->zeroes[w*16+g]) { |
750 | if (prev != -1) { |
751 | av_unused int sfdiff = sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO; |
752 | av_assert1(sfdiff >= 0 && sfdiff <= 2*SCALE_MAX_DIFF); |
753 | } else if (sce->zeroes[0]) { |
754 | /** Set global gain to something useful */ |
755 | sce->sf_idx[0] = sce->sf_idx[w*16+g]; |
756 | } |
757 | prev = sce->sf_idx[w*16+g]; |
758 | } |
759 | } |
760 | } |
761 | } |
762 | |
763 | #endif /* AVCODEC_AACCODER_TWOLOOP_H */ |
764 |