summaryrefslogtreecommitdiff
path: root/libavcodec/g722enc.c (plain)
blob: 01a3db26fd609e063a413fb0fb00c1a995373252
1/*
2 * Copyright (c) CMU 1993 Computer Science, Speech Group
3 * Chengxiang Lu and Alex Hauptmann
4 * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
5 * Copyright (c) 2009 Kenan Gillet
6 * Copyright (c) 2010 Martin Storsjo
7 *
8 * This file is part of FFmpeg.
9 *
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
14 *
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25/**
26 * @file
27 * G.722 ADPCM audio encoder
28 */
29
30#include "libavutil/avassert.h"
31#include "avcodec.h"
32#include "internal.h"
33#include "g722.h"
34#include "libavutil/common.h"
35
36#define FREEZE_INTERVAL 128
37
38/* This is an arbitrary value. Allowing insanely large values leads to strange
39 problems, so we limit it to a reasonable value */
40#define MAX_FRAME_SIZE 32768
41
42/* We clip the value of avctx->trellis to prevent data type overflows and
43 undefined behavior. Using larger values is insanely slow anyway. */
44#define MIN_TRELLIS 0
45#define MAX_TRELLIS 16
46
47static av_cold int g722_encode_close(AVCodecContext *avctx)
48{
49 G722Context *c = avctx->priv_data;
50 int i;
51 for (i = 0; i < 2; i++) {
52 av_freep(&c->paths[i]);
53 av_freep(&c->node_buf[i]);
54 av_freep(&c->nodep_buf[i]);
55 }
56 return 0;
57}
58
59static av_cold int g722_encode_init(AVCodecContext * avctx)
60{
61 G722Context *c = avctx->priv_data;
62 int ret;
63
64 if (avctx->channels != 1) {
65 av_log(avctx, AV_LOG_ERROR, "Only mono tracks are allowed.\n");
66 return AVERROR_INVALIDDATA;
67 }
68
69 c->band[0].scale_factor = 8;
70 c->band[1].scale_factor = 2;
71 c->prev_samples_pos = 22;
72
73 if (avctx->trellis) {
74 int frontier = 1 << avctx->trellis;
75 int max_paths = frontier * FREEZE_INTERVAL;
76 int i;
77 for (i = 0; i < 2; i++) {
78 c->paths[i] = av_mallocz_array(max_paths, sizeof(**c->paths));
79 c->node_buf[i] = av_mallocz_array(frontier, 2 * sizeof(**c->node_buf));
80 c->nodep_buf[i] = av_mallocz_array(frontier, 2 * sizeof(**c->nodep_buf));
81 if (!c->paths[i] || !c->node_buf[i] || !c->nodep_buf[i]) {
82 ret = AVERROR(ENOMEM);
83 goto error;
84 }
85 }
86 }
87
88 if (avctx->frame_size) {
89 /* validate frame size */
90 if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) {
91 int new_frame_size;
92
93 if (avctx->frame_size == 1)
94 new_frame_size = 2;
95 else if (avctx->frame_size > MAX_FRAME_SIZE)
96 new_frame_size = MAX_FRAME_SIZE;
97 else
98 new_frame_size = avctx->frame_size - 1;
99
100 av_log(avctx, AV_LOG_WARNING, "Requested frame size is not "
101 "allowed. Using %d instead of %d\n", new_frame_size,
102 avctx->frame_size);
103 avctx->frame_size = new_frame_size;
104 }
105 } else {
106 /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is
107 a common packet size for VoIP applications */
108 avctx->frame_size = 320;
109 }
110 avctx->initial_padding = 22;
111
112 if (avctx->trellis) {
113 /* validate trellis */
114 if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) {
115 int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS);
116 av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not "
117 "allowed. Using %d instead of %d\n", new_trellis,
118 avctx->trellis);
119 avctx->trellis = new_trellis;
120 }
121 }
122
123 ff_g722dsp_init(&c->dsp);
124
125 return 0;
126error:
127 g722_encode_close(avctx);
128 return ret;
129}
130
131static const int16_t low_quant[33] = {
132 35, 72, 110, 150, 190, 233, 276, 323,
133 370, 422, 473, 530, 587, 650, 714, 786,
134 858, 940, 1023, 1121, 1219, 1339, 1458, 1612,
135 1765, 1980, 2195, 2557, 2919
136};
137
138static inline void filter_samples(G722Context *c, const int16_t *samples,
139 int *xlow, int *xhigh)
140{
141 int xout[2];
142 c->prev_samples[c->prev_samples_pos++] = samples[0];
143 c->prev_samples[c->prev_samples_pos++] = samples[1];
144 c->dsp.apply_qmf(c->prev_samples + c->prev_samples_pos - 24, xout);
145 *xlow = xout[0] + xout[1] >> 14;
146 *xhigh = xout[0] - xout[1] >> 14;
147 if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) {
148 memmove(c->prev_samples,
149 c->prev_samples + c->prev_samples_pos - 22,
150 22 * sizeof(c->prev_samples[0]));
151 c->prev_samples_pos = 22;
152 }
153}
154
155static inline int encode_high(const struct G722Band *state, int xhigh)
156{
157 int diff = av_clip_int16(xhigh - state->s_predictor);
158 int pred = 141 * state->scale_factor >> 8;
159 /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
160 return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
161}
162
163static inline int encode_low(const struct G722Band* state, int xlow)
164{
165 int diff = av_clip_int16(xlow - state->s_predictor);
166 /* = diff >= 0 ? diff : -(diff + 1) */
167 int limit = diff ^ (diff >> (sizeof(diff)*8-1));
168 int i = 0;
169 limit = limit + 1 << 10;
170 if (limit > low_quant[8] * state->scale_factor)
171 i = 9;
172 while (i < 29 && limit > low_quant[i] * state->scale_factor)
173 i++;
174 return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
175}
176
177static void g722_encode_trellis(G722Context *c, int trellis,
178 uint8_t *dst, int nb_samples,
179 const int16_t *samples)
180{
181 int i, j, k;
182 int frontier = 1 << trellis;
183 struct TrellisNode **nodes[2];
184 struct TrellisNode **nodes_next[2];
185 int pathn[2] = {0, 0}, froze = -1;
186 struct TrellisPath *p[2];
187
188 for (i = 0; i < 2; i++) {
189 nodes[i] = c->nodep_buf[i];
190 nodes_next[i] = c->nodep_buf[i] + frontier;
191 memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf[i]));
192 nodes[i][0] = c->node_buf[i] + frontier;
193 nodes[i][0]->ssd = 0;
194 nodes[i][0]->path = 0;
195 nodes[i][0]->state = c->band[i];
196 }
197
198 for (i = 0; i < nb_samples >> 1; i++) {
199 int xlow, xhigh;
200 struct TrellisNode *next[2];
201 int heap_pos[2] = {0, 0};
202
203 for (j = 0; j < 2; j++) {
204 next[j] = c->node_buf[j] + frontier*(i & 1);
205 memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
206 }
207
208 filter_samples(c, &samples[2*i], &xlow, &xhigh);
209
210 for (j = 0; j < frontier && nodes[0][j]; j++) {
211 /* Only k >> 2 affects the future adaptive state, therefore testing
212 * small steps that don't change k >> 2 is useless, the original
213 * value from encode_low is better than them. Since we step k
214 * in steps of 4, make sure range is a multiple of 4, so that
215 * we don't miss the original value from encode_low. */
216 int range = j < frontier/2 ? 4 : 0;
217 struct TrellisNode *cur_node = nodes[0][j];
218
219 int ilow = encode_low(&cur_node->state, xlow);
220
221 for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
222 int decoded, dec_diff, pos;
223 uint32_t ssd;
224 struct TrellisNode* node;
225
226 if (k < 0)
227 continue;
228
229 decoded = av_clip_intp2((cur_node->state.scale_factor *
230 ff_g722_low_inv_quant6[k] >> 10)
231 + cur_node->state.s_predictor, 14);
232 dec_diff = xlow - decoded;
233
234#define STORE_NODE(index, UPDATE, VALUE)\
235 ssd = cur_node->ssd + dec_diff*dec_diff;\
236 /* Check for wraparound. Using 64 bit ssd counters would \
237 * be simpler, but is slower on x86 32 bit. */\
238 if (ssd < cur_node->ssd)\
239 continue;\
240 if (heap_pos[index] < frontier) {\
241 pos = heap_pos[index]++;\
242 av_assert2(pathn[index] < FREEZE_INTERVAL * frontier);\
243 node = nodes_next[index][pos] = next[index]++;\
244 node->path = pathn[index]++;\
245 } else {\
246 /* Try to replace one of the leaf nodes with the new \
247 * one, but not always testing the same leaf position */\
248 pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
249 if (ssd >= nodes_next[index][pos]->ssd)\
250 continue;\
251 heap_pos[index]++;\
252 node = nodes_next[index][pos];\
253 }\
254 node->ssd = ssd;\
255 node->state = cur_node->state;\
256 UPDATE;\
257 c->paths[index][node->path].value = VALUE;\
258 c->paths[index][node->path].prev = cur_node->path;\
259 /* Sift the newly inserted node up in the heap to restore \
260 * the heap property */\
261 while (pos > 0) {\
262 int parent = (pos - 1) >> 1;\
263 if (nodes_next[index][parent]->ssd <= ssd)\
264 break;\
265 FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
266 nodes_next[index][pos]);\
267 pos = parent;\
268 }
269 STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k);
270 }
271 }
272
273 for (j = 0; j < frontier && nodes[1][j]; j++) {
274 int ihigh;
275 struct TrellisNode *cur_node = nodes[1][j];
276
277 /* We don't try to get any initial guess for ihigh via
278 * encode_high - since there's only 4 possible values, test
279 * them all. Testing all of these gives a much, much larger
280 * gain than testing a larger range around ilow. */
281 for (ihigh = 0; ihigh < 4; ihigh++) {
282 int dhigh, decoded, dec_diff, pos;
283 uint32_t ssd;
284 struct TrellisNode* node;
285
286 dhigh = cur_node->state.scale_factor *
287 ff_g722_high_inv_quant[ihigh] >> 10;
288 decoded = av_clip_intp2(dhigh + cur_node->state.s_predictor, 14);
289 dec_diff = xhigh - decoded;
290
291 STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh);
292 }
293 }
294
295 for (j = 0; j < 2; j++) {
296 FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
297
298 if (nodes[j][0]->ssd > (1 << 16)) {
299 for (k = 1; k < frontier && nodes[j][k]; k++)
300 nodes[j][k]->ssd -= nodes[j][0]->ssd;
301 nodes[j][0]->ssd = 0;
302 }
303 }
304
305 if (i == froze + FREEZE_INTERVAL) {
306 p[0] = &c->paths[0][nodes[0][0]->path];
307 p[1] = &c->paths[1][nodes[1][0]->path];
308 for (j = i; j > froze; j--) {
309 dst[j] = p[1]->value << 6 | p[0]->value;
310 p[0] = &c->paths[0][p[0]->prev];
311 p[1] = &c->paths[1][p[1]->prev];
312 }
313 froze = i;
314 pathn[0] = pathn[1] = 0;
315 memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
316 memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
317 }
318 }
319
320 p[0] = &c->paths[0][nodes[0][0]->path];
321 p[1] = &c->paths[1][nodes[1][0]->path];
322 for (j = i; j > froze; j--) {
323 dst[j] = p[1]->value << 6 | p[0]->value;
324 p[0] = &c->paths[0][p[0]->prev];
325 p[1] = &c->paths[1][p[1]->prev];
326 }
327 c->band[0] = nodes[0][0]->state;
328 c->band[1] = nodes[1][0]->state;
329}
330
331static av_always_inline void encode_byte(G722Context *c, uint8_t *dst,
332 const int16_t *samples)
333{
334 int xlow, xhigh, ilow, ihigh;
335 filter_samples(c, samples, &xlow, &xhigh);
336 ihigh = encode_high(&c->band[1], xhigh);
337 ilow = encode_low (&c->band[0], xlow);
338 ff_g722_update_high_predictor(&c->band[1], c->band[1].scale_factor *
339 ff_g722_high_inv_quant[ihigh] >> 10, ihigh);
340 ff_g722_update_low_predictor(&c->band[0], ilow >> 2);
341 *dst = ihigh << 6 | ilow;
342}
343
344static void g722_encode_no_trellis(G722Context *c,
345 uint8_t *dst, int nb_samples,
346 const int16_t *samples)
347{
348 int i;
349 for (i = 0; i < nb_samples; i += 2)
350 encode_byte(c, dst++, &samples[i]);
351}
352
353static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
354 const AVFrame *frame, int *got_packet_ptr)
355{
356 G722Context *c = avctx->priv_data;
357 const int16_t *samples = (const int16_t *)frame->data[0];
358 int nb_samples, out_size, ret;
359
360 out_size = (frame->nb_samples + 1) / 2;
361 if ((ret = ff_alloc_packet2(avctx, avpkt, out_size, 0)) < 0)
362 return ret;
363
364 nb_samples = frame->nb_samples - (frame->nb_samples & 1);
365
366 if (avctx->trellis)
367 g722_encode_trellis(c, avctx->trellis, avpkt->data, nb_samples, samples);
368 else
369 g722_encode_no_trellis(c, avpkt->data, nb_samples, samples);
370
371 /* handle last frame with odd frame_size */
372 if (nb_samples < frame->nb_samples) {
373 int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] };
374 encode_byte(c, &avpkt->data[nb_samples >> 1], last_samples);
375 }
376
377 if (frame->pts != AV_NOPTS_VALUE)
378 avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
379 *got_packet_ptr = 1;
380 return 0;
381}
382
383AVCodec ff_adpcm_g722_encoder = {
384 .name = "g722",
385 .long_name = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
386 .type = AVMEDIA_TYPE_AUDIO,
387 .id = AV_CODEC_ID_ADPCM_G722,
388 .priv_data_size = sizeof(G722Context),
389 .init = g722_encode_init,
390 .close = g722_encode_close,
391 .encode2 = g722_encode_frame,
392 .capabilities = AV_CODEC_CAP_SMALL_LAST_FRAME,
393 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
394 AV_SAMPLE_FMT_NONE },
395};
396