blob: 090dfa5c0a04f6f26019c1f5af982df9dae56f5f
1 | /* |
2 | * Apple ProRes encoder |
3 | * |
4 | * Copyright (c) 2012 Konstantin Shishkov |
5 | * |
6 | * This encoder appears to be based on Anatoliy Wassermans considering |
7 | * similarities in the bugs. |
8 | * |
9 | * This file is part of FFmpeg. |
10 | * |
11 | * FFmpeg is free software; you can redistribute it and/or |
12 | * modify it under the terms of the GNU Lesser General Public |
13 | * License as published by the Free Software Foundation; either |
14 | * version 2.1 of the License, or (at your option) any later version. |
15 | * |
16 | * FFmpeg is distributed in the hope that it will be useful, |
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
19 | * Lesser General Public License for more details. |
20 | * |
21 | * You should have received a copy of the GNU Lesser General Public |
22 | * License along with FFmpeg; if not, write to the Free Software |
23 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
24 | */ |
25 | |
26 | #include "libavutil/opt.h" |
27 | #include "libavutil/pixdesc.h" |
28 | #include "avcodec.h" |
29 | #include "fdctdsp.h" |
30 | #include "put_bits.h" |
31 | #include "bytestream.h" |
32 | #include "internal.h" |
33 | #include "proresdata.h" |
34 | |
35 | #define CFACTOR_Y422 2 |
36 | #define CFACTOR_Y444 3 |
37 | |
38 | #define MAX_MBS_PER_SLICE 8 |
39 | |
40 | #define MAX_PLANES 4 |
41 | |
42 | enum { |
43 | PRORES_PROFILE_AUTO = -1, |
44 | PRORES_PROFILE_PROXY = 0, |
45 | PRORES_PROFILE_LT, |
46 | PRORES_PROFILE_STANDARD, |
47 | PRORES_PROFILE_HQ, |
48 | PRORES_PROFILE_4444, |
49 | }; |
50 | |
51 | enum { |
52 | QUANT_MAT_PROXY = 0, |
53 | QUANT_MAT_LT, |
54 | QUANT_MAT_STANDARD, |
55 | QUANT_MAT_HQ, |
56 | QUANT_MAT_DEFAULT, |
57 | }; |
58 | |
59 | static const uint8_t prores_quant_matrices[][64] = { |
60 | { // proxy |
61 | 4, 7, 9, 11, 13, 14, 15, 63, |
62 | 7, 7, 11, 12, 14, 15, 63, 63, |
63 | 9, 11, 13, 14, 15, 63, 63, 63, |
64 | 11, 11, 13, 14, 63, 63, 63, 63, |
65 | 11, 13, 14, 63, 63, 63, 63, 63, |
66 | 13, 14, 63, 63, 63, 63, 63, 63, |
67 | 13, 63, 63, 63, 63, 63, 63, 63, |
68 | 63, 63, 63, 63, 63, 63, 63, 63, |
69 | }, |
70 | { // LT |
71 | 4, 5, 6, 7, 9, 11, 13, 15, |
72 | 5, 5, 7, 8, 11, 13, 15, 17, |
73 | 6, 7, 9, 11, 13, 15, 15, 17, |
74 | 7, 7, 9, 11, 13, 15, 17, 19, |
75 | 7, 9, 11, 13, 14, 16, 19, 23, |
76 | 9, 11, 13, 14, 16, 19, 23, 29, |
77 | 9, 11, 13, 15, 17, 21, 28, 35, |
78 | 11, 13, 16, 17, 21, 28, 35, 41, |
79 | }, |
80 | { // standard |
81 | 4, 4, 5, 5, 6, 7, 7, 9, |
82 | 4, 4, 5, 6, 7, 7, 9, 9, |
83 | 5, 5, 6, 7, 7, 9, 9, 10, |
84 | 5, 5, 6, 7, 7, 9, 9, 10, |
85 | 5, 6, 7, 7, 8, 9, 10, 12, |
86 | 6, 7, 7, 8, 9, 10, 12, 15, |
87 | 6, 7, 7, 9, 10, 11, 14, 17, |
88 | 7, 7, 9, 10, 11, 14, 17, 21, |
89 | }, |
90 | { // high quality |
91 | 4, 4, 4, 4, 4, 4, 4, 4, |
92 | 4, 4, 4, 4, 4, 4, 4, 4, |
93 | 4, 4, 4, 4, 4, 4, 4, 4, |
94 | 4, 4, 4, 4, 4, 4, 4, 5, |
95 | 4, 4, 4, 4, 4, 4, 5, 5, |
96 | 4, 4, 4, 4, 4, 5, 5, 6, |
97 | 4, 4, 4, 4, 5, 5, 6, 7, |
98 | 4, 4, 4, 4, 5, 6, 7, 7, |
99 | }, |
100 | { // codec default |
101 | 4, 4, 4, 4, 4, 4, 4, 4, |
102 | 4, 4, 4, 4, 4, 4, 4, 4, |
103 | 4, 4, 4, 4, 4, 4, 4, 4, |
104 | 4, 4, 4, 4, 4, 4, 4, 4, |
105 | 4, 4, 4, 4, 4, 4, 4, 4, |
106 | 4, 4, 4, 4, 4, 4, 4, 4, |
107 | 4, 4, 4, 4, 4, 4, 4, 4, |
108 | 4, 4, 4, 4, 4, 4, 4, 4, |
109 | }, |
110 | }; |
111 | |
112 | #define NUM_MB_LIMITS 4 |
113 | static const int prores_mb_limits[NUM_MB_LIMITS] = { |
114 | 1620, // up to 720x576 |
115 | 2700, // up to 960x720 |
116 | 6075, // up to 1440x1080 |
117 | 9216, // up to 2048x1152 |
118 | }; |
119 | |
120 | static const struct prores_profile { |
121 | const char *full_name; |
122 | uint32_t tag; |
123 | int min_quant; |
124 | int max_quant; |
125 | int br_tab[NUM_MB_LIMITS]; |
126 | int quant; |
127 | } prores_profile_info[5] = { |
128 | { |
129 | .full_name = "proxy", |
130 | .tag = MKTAG('a', 'p', 'c', 'o'), |
131 | .min_quant = 4, |
132 | .max_quant = 8, |
133 | .br_tab = { 300, 242, 220, 194 }, |
134 | .quant = QUANT_MAT_PROXY, |
135 | }, |
136 | { |
137 | .full_name = "LT", |
138 | .tag = MKTAG('a', 'p', 'c', 's'), |
139 | .min_quant = 1, |
140 | .max_quant = 9, |
141 | .br_tab = { 720, 560, 490, 440 }, |
142 | .quant = QUANT_MAT_LT, |
143 | }, |
144 | { |
145 | .full_name = "standard", |
146 | .tag = MKTAG('a', 'p', 'c', 'n'), |
147 | .min_quant = 1, |
148 | .max_quant = 6, |
149 | .br_tab = { 1050, 808, 710, 632 }, |
150 | .quant = QUANT_MAT_STANDARD, |
151 | }, |
152 | { |
153 | .full_name = "high quality", |
154 | .tag = MKTAG('a', 'p', 'c', 'h'), |
155 | .min_quant = 1, |
156 | .max_quant = 6, |
157 | .br_tab = { 1566, 1216, 1070, 950 }, |
158 | .quant = QUANT_MAT_HQ, |
159 | }, |
160 | { |
161 | .full_name = "4444", |
162 | .tag = MKTAG('a', 'p', '4', 'h'), |
163 | .min_quant = 1, |
164 | .max_quant = 6, |
165 | .br_tab = { 2350, 1828, 1600, 1425 }, |
166 | .quant = QUANT_MAT_HQ, |
167 | } |
168 | }; |
169 | |
170 | #define TRELLIS_WIDTH 16 |
171 | #define SCORE_LIMIT INT_MAX / 2 |
172 | |
173 | struct TrellisNode { |
174 | int prev_node; |
175 | int quant; |
176 | int bits; |
177 | int score; |
178 | }; |
179 | |
180 | #define MAX_STORED_Q 16 |
181 | |
182 | typedef struct ProresThreadData { |
183 | DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE]; |
184 | DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16]; |
185 | int16_t custom_q[64]; |
186 | struct TrellisNode *nodes; |
187 | } ProresThreadData; |
188 | |
189 | typedef struct ProresContext { |
190 | AVClass *class; |
191 | DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE]; |
192 | DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16]; |
193 | int16_t quants[MAX_STORED_Q][64]; |
194 | int16_t custom_q[64]; |
195 | const uint8_t *quant_mat; |
196 | const uint8_t *scantable; |
197 | |
198 | void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src, |
199 | ptrdiff_t linesize, int16_t *block); |
200 | FDCTDSPContext fdsp; |
201 | |
202 | const AVFrame *pic; |
203 | int mb_width, mb_height; |
204 | int mbs_per_slice; |
205 | int num_chroma_blocks, chroma_factor; |
206 | int slices_width; |
207 | int slices_per_picture; |
208 | int pictures_per_frame; // 1 for progressive, 2 for interlaced |
209 | int cur_picture_idx; |
210 | int num_planes; |
211 | int bits_per_mb; |
212 | int force_quant; |
213 | int alpha_bits; |
214 | int warn; |
215 | |
216 | char *vendor; |
217 | int quant_sel; |
218 | |
219 | int frame_size_upper_bound; |
220 | |
221 | int profile; |
222 | const struct prores_profile *profile_info; |
223 | |
224 | int *slice_q; |
225 | |
226 | ProresThreadData *tdata; |
227 | } ProresContext; |
228 | |
229 | static void get_slice_data(ProresContext *ctx, const uint16_t *src, |
230 | ptrdiff_t linesize, int x, int y, int w, int h, |
231 | int16_t *blocks, uint16_t *emu_buf, |
232 | int mbs_per_slice, int blocks_per_mb, int is_chroma) |
233 | { |
234 | const uint16_t *esrc; |
235 | const int mb_width = 4 * blocks_per_mb; |
236 | ptrdiff_t elinesize; |
237 | int i, j, k; |
238 | |
239 | for (i = 0; i < mbs_per_slice; i++, src += mb_width) { |
240 | if (x >= w) { |
241 | memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb |
242 | * sizeof(*blocks)); |
243 | return; |
244 | } |
245 | if (x + mb_width <= w && y + 16 <= h) { |
246 | esrc = src; |
247 | elinesize = linesize; |
248 | } else { |
249 | int bw, bh, pix; |
250 | |
251 | esrc = emu_buf; |
252 | elinesize = 16 * sizeof(*emu_buf); |
253 | |
254 | bw = FFMIN(w - x, mb_width); |
255 | bh = FFMIN(h - y, 16); |
256 | |
257 | for (j = 0; j < bh; j++) { |
258 | memcpy(emu_buf + j * 16, |
259 | (const uint8_t*)src + j * linesize, |
260 | bw * sizeof(*src)); |
261 | pix = emu_buf[j * 16 + bw - 1]; |
262 | for (k = bw; k < mb_width; k++) |
263 | emu_buf[j * 16 + k] = pix; |
264 | } |
265 | for (; j < 16; j++) |
266 | memcpy(emu_buf + j * 16, |
267 | emu_buf + (bh - 1) * 16, |
268 | mb_width * sizeof(*emu_buf)); |
269 | } |
270 | if (!is_chroma) { |
271 | ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks); |
272 | blocks += 64; |
273 | if (blocks_per_mb > 2) { |
274 | ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks); |
275 | blocks += 64; |
276 | } |
277 | ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks); |
278 | blocks += 64; |
279 | if (blocks_per_mb > 2) { |
280 | ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks); |
281 | blocks += 64; |
282 | } |
283 | } else { |
284 | ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks); |
285 | blocks += 64; |
286 | ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks); |
287 | blocks += 64; |
288 | if (blocks_per_mb > 2) { |
289 | ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks); |
290 | blocks += 64; |
291 | ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks); |
292 | blocks += 64; |
293 | } |
294 | } |
295 | |
296 | x += mb_width; |
297 | } |
298 | } |
299 | |
300 | static void get_alpha_data(ProresContext *ctx, const uint16_t *src, |
301 | ptrdiff_t linesize, int x, int y, int w, int h, |
302 | int16_t *blocks, int mbs_per_slice, int abits) |
303 | { |
304 | const int slice_width = 16 * mbs_per_slice; |
305 | int i, j, copy_w, copy_h; |
306 | |
307 | copy_w = FFMIN(w - x, slice_width); |
308 | copy_h = FFMIN(h - y, 16); |
309 | for (i = 0; i < copy_h; i++) { |
310 | memcpy(blocks, src, copy_w * sizeof(*src)); |
311 | if (abits == 8) |
312 | for (j = 0; j < copy_w; j++) |
313 | blocks[j] >>= 2; |
314 | else |
315 | for (j = 0; j < copy_w; j++) |
316 | blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4); |
317 | for (j = copy_w; j < slice_width; j++) |
318 | blocks[j] = blocks[copy_w - 1]; |
319 | blocks += slice_width; |
320 | src += linesize >> 1; |
321 | } |
322 | for (; i < 16; i++) { |
323 | memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks)); |
324 | blocks += slice_width; |
325 | } |
326 | } |
327 | |
328 | /** |
329 | * Write an unsigned rice/exp golomb codeword. |
330 | */ |
331 | static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val) |
332 | { |
333 | unsigned int rice_order, exp_order, switch_bits, switch_val; |
334 | int exponent; |
335 | |
336 | /* number of prefix bits to switch between Rice and expGolomb */ |
337 | switch_bits = (codebook & 3) + 1; |
338 | rice_order = codebook >> 5; /* rice code order */ |
339 | exp_order = (codebook >> 2) & 7; /* exp golomb code order */ |
340 | |
341 | switch_val = switch_bits << rice_order; |
342 | |
343 | if (val >= switch_val) { |
344 | val -= switch_val - (1 << exp_order); |
345 | exponent = av_log2(val); |
346 | |
347 | put_bits(pb, exponent - exp_order + switch_bits, 0); |
348 | put_bits(pb, exponent + 1, val); |
349 | } else { |
350 | exponent = val >> rice_order; |
351 | |
352 | if (exponent) |
353 | put_bits(pb, exponent, 0); |
354 | put_bits(pb, 1, 1); |
355 | if (rice_order) |
356 | put_sbits(pb, rice_order, val); |
357 | } |
358 | } |
359 | |
360 | #define GET_SIGN(x) ((x) >> 31) |
361 | #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x)) |
362 | |
363 | static void encode_dcs(PutBitContext *pb, int16_t *blocks, |
364 | int blocks_per_slice, int scale) |
365 | { |
366 | int i; |
367 | int codebook = 3, code, dc, prev_dc, delta, sign, new_sign; |
368 | |
369 | prev_dc = (blocks[0] - 0x4000) / scale; |
370 | encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc)); |
371 | sign = 0; |
372 | codebook = 3; |
373 | blocks += 64; |
374 | |
375 | for (i = 1; i < blocks_per_slice; i++, blocks += 64) { |
376 | dc = (blocks[0] - 0x4000) / scale; |
377 | delta = dc - prev_dc; |
378 | new_sign = GET_SIGN(delta); |
379 | delta = (delta ^ sign) - sign; |
380 | code = MAKE_CODE(delta); |
381 | encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code); |
382 | codebook = (code + (code & 1)) >> 1; |
383 | codebook = FFMIN(codebook, 3); |
384 | sign = new_sign; |
385 | prev_dc = dc; |
386 | } |
387 | } |
388 | |
389 | static void encode_acs(PutBitContext *pb, int16_t *blocks, |
390 | int blocks_per_slice, |
391 | int plane_size_factor, |
392 | const uint8_t *scan, const int16_t *qmat) |
393 | { |
394 | int idx, i; |
395 | int run, level, run_cb, lev_cb; |
396 | int max_coeffs, abs_level; |
397 | |
398 | max_coeffs = blocks_per_slice << 6; |
399 | run_cb = ff_prores_run_to_cb_index[4]; |
400 | lev_cb = ff_prores_lev_to_cb_index[2]; |
401 | run = 0; |
402 | |
403 | for (i = 1; i < 64; i++) { |
404 | for (idx = scan[i]; idx < max_coeffs; idx += 64) { |
405 | level = blocks[idx] / qmat[scan[i]]; |
406 | if (level) { |
407 | abs_level = FFABS(level); |
408 | encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run); |
409 | encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb], |
410 | abs_level - 1); |
411 | put_sbits(pb, 1, GET_SIGN(level)); |
412 | |
413 | run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)]; |
414 | lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)]; |
415 | run = 0; |
416 | } else { |
417 | run++; |
418 | } |
419 | } |
420 | } |
421 | } |
422 | |
423 | static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb, |
424 | const uint16_t *src, ptrdiff_t linesize, |
425 | int mbs_per_slice, int16_t *blocks, |
426 | int blocks_per_mb, int plane_size_factor, |
427 | const int16_t *qmat) |
428 | { |
429 | int blocks_per_slice, saved_pos; |
430 | |
431 | saved_pos = put_bits_count(pb); |
432 | blocks_per_slice = mbs_per_slice * blocks_per_mb; |
433 | |
434 | encode_dcs(pb, blocks, blocks_per_slice, qmat[0]); |
435 | encode_acs(pb, blocks, blocks_per_slice, plane_size_factor, |
436 | ctx->scantable, qmat); |
437 | flush_put_bits(pb); |
438 | |
439 | return (put_bits_count(pb) - saved_pos) >> 3; |
440 | } |
441 | |
442 | static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits) |
443 | { |
444 | const int dbits = (abits == 8) ? 4 : 7; |
445 | const int dsize = 1 << dbits - 1; |
446 | int diff = cur - prev; |
447 | |
448 | diff = av_mod_uintp2(diff, abits); |
449 | if (diff >= (1 << abits) - dsize) |
450 | diff -= 1 << abits; |
451 | if (diff < -dsize || diff > dsize || !diff) { |
452 | put_bits(pb, 1, 1); |
453 | put_bits(pb, abits, diff); |
454 | } else { |
455 | put_bits(pb, 1, 0); |
456 | put_bits(pb, dbits - 1, FFABS(diff) - 1); |
457 | put_bits(pb, 1, diff < 0); |
458 | } |
459 | } |
460 | |
461 | static void put_alpha_run(PutBitContext *pb, int run) |
462 | { |
463 | if (run) { |
464 | put_bits(pb, 1, 0); |
465 | if (run < 0x10) |
466 | put_bits(pb, 4, run); |
467 | else |
468 | put_bits(pb, 15, run); |
469 | } else { |
470 | put_bits(pb, 1, 1); |
471 | } |
472 | } |
473 | |
474 | // todo alpha quantisation for high quants |
475 | static int encode_alpha_plane(ProresContext *ctx, PutBitContext *pb, |
476 | int mbs_per_slice, uint16_t *blocks, |
477 | int quant) |
478 | { |
479 | const int abits = ctx->alpha_bits; |
480 | const int mask = (1 << abits) - 1; |
481 | const int num_coeffs = mbs_per_slice * 256; |
482 | int saved_pos = put_bits_count(pb); |
483 | int prev = mask, cur; |
484 | int idx = 0; |
485 | int run = 0; |
486 | |
487 | cur = blocks[idx++]; |
488 | put_alpha_diff(pb, cur, prev, abits); |
489 | prev = cur; |
490 | do { |
491 | cur = blocks[idx++]; |
492 | if (cur != prev) { |
493 | put_alpha_run (pb, run); |
494 | put_alpha_diff(pb, cur, prev, abits); |
495 | prev = cur; |
496 | run = 0; |
497 | } else { |
498 | run++; |
499 | } |
500 | } while (idx < num_coeffs); |
501 | if (run) |
502 | put_alpha_run(pb, run); |
503 | flush_put_bits(pb); |
504 | return (put_bits_count(pb) - saved_pos) >> 3; |
505 | } |
506 | |
507 | static int encode_slice(AVCodecContext *avctx, const AVFrame *pic, |
508 | PutBitContext *pb, |
509 | int sizes[4], int x, int y, int quant, |
510 | int mbs_per_slice) |
511 | { |
512 | ProresContext *ctx = avctx->priv_data; |
513 | int i, xp, yp; |
514 | int total_size = 0; |
515 | const uint16_t *src; |
516 | int slice_width_factor = av_log2(mbs_per_slice); |
517 | int num_cblocks, pwidth, line_add; |
518 | ptrdiff_t linesize; |
519 | int plane_factor, is_chroma; |
520 | uint16_t *qmat; |
521 | |
522 | if (ctx->pictures_per_frame == 1) |
523 | line_add = 0; |
524 | else |
525 | line_add = ctx->cur_picture_idx ^ !pic->top_field_first; |
526 | |
527 | if (ctx->force_quant) { |
528 | qmat = ctx->quants[0]; |
529 | } else if (quant < MAX_STORED_Q) { |
530 | qmat = ctx->quants[quant]; |
531 | } else { |
532 | qmat = ctx->custom_q; |
533 | for (i = 0; i < 64; i++) |
534 | qmat[i] = ctx->quant_mat[i] * quant; |
535 | } |
536 | |
537 | for (i = 0; i < ctx->num_planes; i++) { |
538 | is_chroma = (i == 1 || i == 2); |
539 | plane_factor = slice_width_factor + 2; |
540 | if (is_chroma) |
541 | plane_factor += ctx->chroma_factor - 3; |
542 | if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) { |
543 | xp = x << 4; |
544 | yp = y << 4; |
545 | num_cblocks = 4; |
546 | pwidth = avctx->width; |
547 | } else { |
548 | xp = x << 3; |
549 | yp = y << 4; |
550 | num_cblocks = 2; |
551 | pwidth = avctx->width >> 1; |
552 | } |
553 | |
554 | linesize = pic->linesize[i] * ctx->pictures_per_frame; |
555 | src = (const uint16_t*)(pic->data[i] + yp * linesize + |
556 | line_add * pic->linesize[i]) + xp; |
557 | |
558 | if (i < 3) { |
559 | get_slice_data(ctx, src, linesize, xp, yp, |
560 | pwidth, avctx->height / ctx->pictures_per_frame, |
561 | ctx->blocks[0], ctx->emu_buf, |
562 | mbs_per_slice, num_cblocks, is_chroma); |
563 | sizes[i] = encode_slice_plane(ctx, pb, src, linesize, |
564 | mbs_per_slice, ctx->blocks[0], |
565 | num_cblocks, plane_factor, |
566 | qmat); |
567 | } else { |
568 | get_alpha_data(ctx, src, linesize, xp, yp, |
569 | pwidth, avctx->height / ctx->pictures_per_frame, |
570 | ctx->blocks[0], mbs_per_slice, ctx->alpha_bits); |
571 | sizes[i] = encode_alpha_plane(ctx, pb, mbs_per_slice, |
572 | ctx->blocks[0], quant); |
573 | } |
574 | total_size += sizes[i]; |
575 | if (put_bits_left(pb) < 0) { |
576 | av_log(avctx, AV_LOG_ERROR, |
577 | "Underestimated required buffer size.\n"); |
578 | return AVERROR_BUG; |
579 | } |
580 | } |
581 | return total_size; |
582 | } |
583 | |
584 | static inline int estimate_vlc(unsigned codebook, int val) |
585 | { |
586 | unsigned int rice_order, exp_order, switch_bits, switch_val; |
587 | int exponent; |
588 | |
589 | /* number of prefix bits to switch between Rice and expGolomb */ |
590 | switch_bits = (codebook & 3) + 1; |
591 | rice_order = codebook >> 5; /* rice code order */ |
592 | exp_order = (codebook >> 2) & 7; /* exp golomb code order */ |
593 | |
594 | switch_val = switch_bits << rice_order; |
595 | |
596 | if (val >= switch_val) { |
597 | val -= switch_val - (1 << exp_order); |
598 | exponent = av_log2(val); |
599 | |
600 | return exponent * 2 - exp_order + switch_bits + 1; |
601 | } else { |
602 | return (val >> rice_order) + rice_order + 1; |
603 | } |
604 | } |
605 | |
606 | static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice, |
607 | int scale) |
608 | { |
609 | int i; |
610 | int codebook = 3, code, dc, prev_dc, delta, sign, new_sign; |
611 | int bits; |
612 | |
613 | prev_dc = (blocks[0] - 0x4000) / scale; |
614 | bits = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc)); |
615 | sign = 0; |
616 | codebook = 3; |
617 | blocks += 64; |
618 | *error += FFABS(blocks[0] - 0x4000) % scale; |
619 | |
620 | for (i = 1; i < blocks_per_slice; i++, blocks += 64) { |
621 | dc = (blocks[0] - 0x4000) / scale; |
622 | *error += FFABS(blocks[0] - 0x4000) % scale; |
623 | delta = dc - prev_dc; |
624 | new_sign = GET_SIGN(delta); |
625 | delta = (delta ^ sign) - sign; |
626 | code = MAKE_CODE(delta); |
627 | bits += estimate_vlc(ff_prores_dc_codebook[codebook], code); |
628 | codebook = (code + (code & 1)) >> 1; |
629 | codebook = FFMIN(codebook, 3); |
630 | sign = new_sign; |
631 | prev_dc = dc; |
632 | } |
633 | |
634 | return bits; |
635 | } |
636 | |
637 | static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice, |
638 | int plane_size_factor, |
639 | const uint8_t *scan, const int16_t *qmat) |
640 | { |
641 | int idx, i; |
642 | int run, level, run_cb, lev_cb; |
643 | int max_coeffs, abs_level; |
644 | int bits = 0; |
645 | |
646 | max_coeffs = blocks_per_slice << 6; |
647 | run_cb = ff_prores_run_to_cb_index[4]; |
648 | lev_cb = ff_prores_lev_to_cb_index[2]; |
649 | run = 0; |
650 | |
651 | for (i = 1; i < 64; i++) { |
652 | for (idx = scan[i]; idx < max_coeffs; idx += 64) { |
653 | level = blocks[idx] / qmat[scan[i]]; |
654 | *error += FFABS(blocks[idx]) % qmat[scan[i]]; |
655 | if (level) { |
656 | abs_level = FFABS(level); |
657 | bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run); |
658 | bits += estimate_vlc(ff_prores_ac_codebook[lev_cb], |
659 | abs_level - 1) + 1; |
660 | |
661 | run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)]; |
662 | lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)]; |
663 | run = 0; |
664 | } else { |
665 | run++; |
666 | } |
667 | } |
668 | } |
669 | |
670 | return bits; |
671 | } |
672 | |
673 | static int estimate_slice_plane(ProresContext *ctx, int *error, int plane, |
674 | const uint16_t *src, ptrdiff_t linesize, |
675 | int mbs_per_slice, |
676 | int blocks_per_mb, int plane_size_factor, |
677 | const int16_t *qmat, ProresThreadData *td) |
678 | { |
679 | int blocks_per_slice; |
680 | int bits; |
681 | |
682 | blocks_per_slice = mbs_per_slice * blocks_per_mb; |
683 | |
684 | bits = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]); |
685 | bits += estimate_acs(error, td->blocks[plane], blocks_per_slice, |
686 | plane_size_factor, ctx->scantable, qmat); |
687 | |
688 | return FFALIGN(bits, 8); |
689 | } |
690 | |
691 | static int est_alpha_diff(int cur, int prev, int abits) |
692 | { |
693 | const int dbits = (abits == 8) ? 4 : 7; |
694 | const int dsize = 1 << dbits - 1; |
695 | int diff = cur - prev; |
696 | |
697 | diff = av_mod_uintp2(diff, abits); |
698 | if (diff >= (1 << abits) - dsize) |
699 | diff -= 1 << abits; |
700 | if (diff < -dsize || diff > dsize || !diff) |
701 | return abits + 1; |
702 | else |
703 | return dbits + 1; |
704 | } |
705 | |
706 | static int estimate_alpha_plane(ProresContext *ctx, int *error, |
707 | const uint16_t *src, ptrdiff_t linesize, |
708 | int mbs_per_slice, int quant, |
709 | int16_t *blocks) |
710 | { |
711 | const int abits = ctx->alpha_bits; |
712 | const int mask = (1 << abits) - 1; |
713 | const int num_coeffs = mbs_per_slice * 256; |
714 | int prev = mask, cur; |
715 | int idx = 0; |
716 | int run = 0; |
717 | int bits; |
718 | |
719 | *error = 0; |
720 | cur = blocks[idx++]; |
721 | bits = est_alpha_diff(cur, prev, abits); |
722 | prev = cur; |
723 | do { |
724 | cur = blocks[idx++]; |
725 | if (cur != prev) { |
726 | if (!run) |
727 | bits++; |
728 | else if (run < 0x10) |
729 | bits += 4; |
730 | else |
731 | bits += 15; |
732 | bits += est_alpha_diff(cur, prev, abits); |
733 | prev = cur; |
734 | run = 0; |
735 | } else { |
736 | run++; |
737 | } |
738 | } while (idx < num_coeffs); |
739 | |
740 | if (run) { |
741 | if (run < 0x10) |
742 | bits += 4; |
743 | else |
744 | bits += 15; |
745 | } |
746 | |
747 | return bits; |
748 | } |
749 | |
750 | static int find_slice_quant(AVCodecContext *avctx, |
751 | int trellis_node, int x, int y, int mbs_per_slice, |
752 | ProresThreadData *td) |
753 | { |
754 | ProresContext *ctx = avctx->priv_data; |
755 | int i, q, pq, xp, yp; |
756 | const uint16_t *src; |
757 | int slice_width_factor = av_log2(mbs_per_slice); |
758 | int num_cblocks[MAX_PLANES], pwidth; |
759 | int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES]; |
760 | const int min_quant = ctx->profile_info->min_quant; |
761 | const int max_quant = ctx->profile_info->max_quant; |
762 | int error, bits, bits_limit; |
763 | int mbs, prev, cur, new_score; |
764 | int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH]; |
765 | int overquant; |
766 | uint16_t *qmat; |
767 | int linesize[4], line_add; |
768 | |
769 | if (ctx->pictures_per_frame == 1) |
770 | line_add = 0; |
771 | else |
772 | line_add = ctx->cur_picture_idx ^ !ctx->pic->top_field_first; |
773 | mbs = x + mbs_per_slice; |
774 | |
775 | for (i = 0; i < ctx->num_planes; i++) { |
776 | is_chroma[i] = (i == 1 || i == 2); |
777 | plane_factor[i] = slice_width_factor + 2; |
778 | if (is_chroma[i]) |
779 | plane_factor[i] += ctx->chroma_factor - 3; |
780 | if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) { |
781 | xp = x << 4; |
782 | yp = y << 4; |
783 | num_cblocks[i] = 4; |
784 | pwidth = avctx->width; |
785 | } else { |
786 | xp = x << 3; |
787 | yp = y << 4; |
788 | num_cblocks[i] = 2; |
789 | pwidth = avctx->width >> 1; |
790 | } |
791 | |
792 | linesize[i] = ctx->pic->linesize[i] * ctx->pictures_per_frame; |
793 | src = (const uint16_t *)(ctx->pic->data[i] + yp * linesize[i] + |
794 | line_add * ctx->pic->linesize[i]) + xp; |
795 | |
796 | if (i < 3) { |
797 | get_slice_data(ctx, src, linesize[i], xp, yp, |
798 | pwidth, avctx->height / ctx->pictures_per_frame, |
799 | td->blocks[i], td->emu_buf, |
800 | mbs_per_slice, num_cblocks[i], is_chroma[i]); |
801 | } else { |
802 | get_alpha_data(ctx, src, linesize[i], xp, yp, |
803 | pwidth, avctx->height / ctx->pictures_per_frame, |
804 | td->blocks[i], mbs_per_slice, ctx->alpha_bits); |
805 | } |
806 | } |
807 | |
808 | for (q = min_quant; q < max_quant + 2; q++) { |
809 | td->nodes[trellis_node + q].prev_node = -1; |
810 | td->nodes[trellis_node + q].quant = q; |
811 | } |
812 | |
813 | // todo: maybe perform coarser quantising to fit into frame size when needed |
814 | for (q = min_quant; q <= max_quant; q++) { |
815 | bits = 0; |
816 | error = 0; |
817 | for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) { |
818 | bits += estimate_slice_plane(ctx, &error, i, |
819 | src, linesize[i], |
820 | mbs_per_slice, |
821 | num_cblocks[i], plane_factor[i], |
822 | ctx->quants[q], td); |
823 | } |
824 | if (ctx->alpha_bits) |
825 | bits += estimate_alpha_plane(ctx, &error, src, linesize[3], |
826 | mbs_per_slice, q, td->blocks[3]); |
827 | if (bits > 65000 * 8) |
828 | error = SCORE_LIMIT; |
829 | |
830 | slice_bits[q] = bits; |
831 | slice_score[q] = error; |
832 | } |
833 | if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) { |
834 | slice_bits[max_quant + 1] = slice_bits[max_quant]; |
835 | slice_score[max_quant + 1] = slice_score[max_quant] + 1; |
836 | overquant = max_quant; |
837 | } else { |
838 | for (q = max_quant + 1; q < 128; q++) { |
839 | bits = 0; |
840 | error = 0; |
841 | if (q < MAX_STORED_Q) { |
842 | qmat = ctx->quants[q]; |
843 | } else { |
844 | qmat = td->custom_q; |
845 | for (i = 0; i < 64; i++) |
846 | qmat[i] = ctx->quant_mat[i] * q; |
847 | } |
848 | for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) { |
849 | bits += estimate_slice_plane(ctx, &error, i, |
850 | src, linesize[i], |
851 | mbs_per_slice, |
852 | num_cblocks[i], plane_factor[i], |
853 | qmat, td); |
854 | } |
855 | if (ctx->alpha_bits) |
856 | bits += estimate_alpha_plane(ctx, &error, src, linesize[3], |
857 | mbs_per_slice, q, td->blocks[3]); |
858 | if (bits <= ctx->bits_per_mb * mbs_per_slice) |
859 | break; |
860 | } |
861 | |
862 | slice_bits[max_quant + 1] = bits; |
863 | slice_score[max_quant + 1] = error; |
864 | overquant = q; |
865 | } |
866 | td->nodes[trellis_node + max_quant + 1].quant = overquant; |
867 | |
868 | bits_limit = mbs * ctx->bits_per_mb; |
869 | for (pq = min_quant; pq < max_quant + 2; pq++) { |
870 | prev = trellis_node - TRELLIS_WIDTH + pq; |
871 | |
872 | for (q = min_quant; q < max_quant + 2; q++) { |
873 | cur = trellis_node + q; |
874 | |
875 | bits = td->nodes[prev].bits + slice_bits[q]; |
876 | error = slice_score[q]; |
877 | if (bits > bits_limit) |
878 | error = SCORE_LIMIT; |
879 | |
880 | if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT) |
881 | new_score = td->nodes[prev].score + error; |
882 | else |
883 | new_score = SCORE_LIMIT; |
884 | if (td->nodes[cur].prev_node == -1 || |
885 | td->nodes[cur].score >= new_score) { |
886 | |
887 | td->nodes[cur].bits = bits; |
888 | td->nodes[cur].score = new_score; |
889 | td->nodes[cur].prev_node = prev; |
890 | } |
891 | } |
892 | } |
893 | |
894 | error = td->nodes[trellis_node + min_quant].score; |
895 | pq = trellis_node + min_quant; |
896 | for (q = min_quant + 1; q < max_quant + 2; q++) { |
897 | if (td->nodes[trellis_node + q].score <= error) { |
898 | error = td->nodes[trellis_node + q].score; |
899 | pq = trellis_node + q; |
900 | } |
901 | } |
902 | |
903 | return pq; |
904 | } |
905 | |
906 | static int find_quant_thread(AVCodecContext *avctx, void *arg, |
907 | int jobnr, int threadnr) |
908 | { |
909 | ProresContext *ctx = avctx->priv_data; |
910 | ProresThreadData *td = ctx->tdata + threadnr; |
911 | int mbs_per_slice = ctx->mbs_per_slice; |
912 | int x, y = jobnr, mb, q = 0; |
913 | |
914 | for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) { |
915 | while (ctx->mb_width - x < mbs_per_slice) |
916 | mbs_per_slice >>= 1; |
917 | q = find_slice_quant(avctx, |
918 | (mb + 1) * TRELLIS_WIDTH, x, y, |
919 | mbs_per_slice, td); |
920 | } |
921 | |
922 | for (x = ctx->slices_width - 1; x >= 0; x--) { |
923 | ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant; |
924 | q = td->nodes[q].prev_node; |
925 | } |
926 | |
927 | return 0; |
928 | } |
929 | |
930 | static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, |
931 | const AVFrame *pic, int *got_packet) |
932 | { |
933 | ProresContext *ctx = avctx->priv_data; |
934 | uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp; |
935 | uint8_t *picture_size_pos; |
936 | PutBitContext pb; |
937 | int x, y, i, mb, q = 0; |
938 | int sizes[4] = { 0 }; |
939 | int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1); |
940 | int frame_size, picture_size, slice_size; |
941 | int pkt_size, ret; |
942 | int max_slice_size = (ctx->frame_size_upper_bound - 200) / (ctx->pictures_per_frame * ctx->slices_per_picture + 1); |
943 | uint8_t frame_flags; |
944 | |
945 | ctx->pic = pic; |
946 | pkt_size = ctx->frame_size_upper_bound; |
947 | |
948 | if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size + AV_INPUT_BUFFER_MIN_SIZE, 0)) < 0) |
949 | return ret; |
950 | |
951 | orig_buf = pkt->data; |
952 | |
953 | // frame atom |
954 | orig_buf += 4; // frame size |
955 | bytestream_put_be32 (&orig_buf, FRAME_ID); // frame container ID |
956 | buf = orig_buf; |
957 | |
958 | // frame header |
959 | tmp = buf; |
960 | buf += 2; // frame header size will be stored here |
961 | bytestream_put_be16 (&buf, 0); // version 1 |
962 | bytestream_put_buffer(&buf, ctx->vendor, 4); |
963 | bytestream_put_be16 (&buf, avctx->width); |
964 | bytestream_put_be16 (&buf, avctx->height); |
965 | |
966 | frame_flags = ctx->chroma_factor << 6; |
967 | if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) |
968 | frame_flags |= pic->top_field_first ? 0x04 : 0x08; |
969 | bytestream_put_byte (&buf, frame_flags); |
970 | |
971 | bytestream_put_byte (&buf, 0); // reserved |
972 | bytestream_put_byte (&buf, avctx->color_primaries); |
973 | bytestream_put_byte (&buf, avctx->color_trc); |
974 | bytestream_put_byte (&buf, avctx->colorspace); |
975 | bytestream_put_byte (&buf, 0x40 | (ctx->alpha_bits >> 3)); |
976 | bytestream_put_byte (&buf, 0); // reserved |
977 | if (ctx->quant_sel != QUANT_MAT_DEFAULT) { |
978 | bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present |
979 | // luma quantisation matrix |
980 | for (i = 0; i < 64; i++) |
981 | bytestream_put_byte(&buf, ctx->quant_mat[i]); |
982 | // chroma quantisation matrix |
983 | for (i = 0; i < 64; i++) |
984 | bytestream_put_byte(&buf, ctx->quant_mat[i]); |
985 | } else { |
986 | bytestream_put_byte (&buf, 0x00); // matrix flags - default matrices are used |
987 | } |
988 | bytestream_put_be16 (&tmp, buf - orig_buf); // write back frame header size |
989 | |
990 | for (ctx->cur_picture_idx = 0; |
991 | ctx->cur_picture_idx < ctx->pictures_per_frame; |
992 | ctx->cur_picture_idx++) { |
993 | // picture header |
994 | picture_size_pos = buf + 1; |
995 | bytestream_put_byte (&buf, 0x40); // picture header size (in bits) |
996 | buf += 4; // picture data size will be stored here |
997 | bytestream_put_be16 (&buf, ctx->slices_per_picture); |
998 | bytestream_put_byte (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs |
999 | |
1000 | // seek table - will be filled during slice encoding |
1001 | slice_sizes = buf; |
1002 | buf += ctx->slices_per_picture * 2; |
1003 | |
1004 | // slices |
1005 | if (!ctx->force_quant) { |
1006 | ret = avctx->execute2(avctx, find_quant_thread, (void*)pic, NULL, |
1007 | ctx->mb_height); |
1008 | if (ret) |
1009 | return ret; |
1010 | } |
1011 | |
1012 | for (y = 0; y < ctx->mb_height; y++) { |
1013 | int mbs_per_slice = ctx->mbs_per_slice; |
1014 | for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) { |
1015 | q = ctx->force_quant ? ctx->force_quant |
1016 | : ctx->slice_q[mb + y * ctx->slices_width]; |
1017 | |
1018 | while (ctx->mb_width - x < mbs_per_slice) |
1019 | mbs_per_slice >>= 1; |
1020 | |
1021 | bytestream_put_byte(&buf, slice_hdr_size << 3); |
1022 | slice_hdr = buf; |
1023 | buf += slice_hdr_size - 1; |
1024 | if (pkt_size <= buf - orig_buf + 2 * max_slice_size) { |
1025 | uint8_t *start = pkt->data; |
1026 | // Recompute new size according to max_slice_size |
1027 | // and deduce delta |
1028 | int delta = 200 + (ctx->pictures_per_frame * |
1029 | ctx->slices_per_picture + 1) * |
1030 | max_slice_size - pkt_size; |
1031 | |
1032 | delta = FFMAX(delta, 2 * max_slice_size); |
1033 | ctx->frame_size_upper_bound += delta; |
1034 | |
1035 | if (!ctx->warn) { |
1036 | avpriv_request_sample(avctx, |
1037 | "Packet too small: is %i," |
1038 | " needs %i (slice: %i). " |
1039 | "Correct allocation", |
1040 | pkt_size, delta, max_slice_size); |
1041 | ctx->warn = 1; |
1042 | } |
1043 | |
1044 | ret = av_grow_packet(pkt, delta); |
1045 | if (ret < 0) |
1046 | return ret; |
1047 | |
1048 | pkt_size += delta; |
1049 | // restore pointers |
1050 | orig_buf = pkt->data + (orig_buf - start); |
1051 | buf = pkt->data + (buf - start); |
1052 | picture_size_pos = pkt->data + (picture_size_pos - start); |
1053 | slice_sizes = pkt->data + (slice_sizes - start); |
1054 | slice_hdr = pkt->data + (slice_hdr - start); |
1055 | tmp = pkt->data + (tmp - start); |
1056 | } |
1057 | init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf))); |
1058 | ret = encode_slice(avctx, pic, &pb, sizes, x, y, q, |
1059 | mbs_per_slice); |
1060 | if (ret < 0) |
1061 | return ret; |
1062 | |
1063 | bytestream_put_byte(&slice_hdr, q); |
1064 | slice_size = slice_hdr_size + sizes[ctx->num_planes - 1]; |
1065 | for (i = 0; i < ctx->num_planes - 1; i++) { |
1066 | bytestream_put_be16(&slice_hdr, sizes[i]); |
1067 | slice_size += sizes[i]; |
1068 | } |
1069 | bytestream_put_be16(&slice_sizes, slice_size); |
1070 | buf += slice_size - slice_hdr_size; |
1071 | if (max_slice_size < slice_size) |
1072 | max_slice_size = slice_size; |
1073 | } |
1074 | } |
1075 | |
1076 | picture_size = buf - (picture_size_pos - 1); |
1077 | bytestream_put_be32(&picture_size_pos, picture_size); |
1078 | } |
1079 | |
1080 | orig_buf -= 8; |
1081 | frame_size = buf - orig_buf; |
1082 | bytestream_put_be32(&orig_buf, frame_size); |
1083 | |
1084 | pkt->size = frame_size; |
1085 | pkt->flags |= AV_PKT_FLAG_KEY; |
1086 | *got_packet = 1; |
1087 | |
1088 | return 0; |
1089 | } |
1090 | |
1091 | static av_cold int encode_close(AVCodecContext *avctx) |
1092 | { |
1093 | ProresContext *ctx = avctx->priv_data; |
1094 | int i; |
1095 | |
1096 | if (ctx->tdata) { |
1097 | for (i = 0; i < avctx->thread_count; i++) |
1098 | av_freep(&ctx->tdata[i].nodes); |
1099 | } |
1100 | av_freep(&ctx->tdata); |
1101 | av_freep(&ctx->slice_q); |
1102 | |
1103 | return 0; |
1104 | } |
1105 | |
1106 | static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src, |
1107 | ptrdiff_t linesize, int16_t *block) |
1108 | { |
1109 | int x, y; |
1110 | const uint16_t *tsrc = src; |
1111 | |
1112 | for (y = 0; y < 8; y++) { |
1113 | for (x = 0; x < 8; x++) |
1114 | block[y * 8 + x] = tsrc[x]; |
1115 | tsrc += linesize >> 1; |
1116 | } |
1117 | fdsp->fdct(block); |
1118 | } |
1119 | |
1120 | static av_cold int encode_init(AVCodecContext *avctx) |
1121 | { |
1122 | ProresContext *ctx = avctx->priv_data; |
1123 | int mps; |
1124 | int i, j; |
1125 | int min_quant, max_quant; |
1126 | int interlaced = !!(avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT); |
1127 | |
1128 | avctx->bits_per_raw_sample = 10; |
1129 | #if FF_API_CODED_FRAME |
1130 | FF_DISABLE_DEPRECATION_WARNINGS |
1131 | avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I; |
1132 | avctx->coded_frame->key_frame = 1; |
1133 | FF_ENABLE_DEPRECATION_WARNINGS |
1134 | #endif |
1135 | |
1136 | ctx->fdct = prores_fdct; |
1137 | ctx->scantable = interlaced ? ff_prores_interlaced_scan |
1138 | : ff_prores_progressive_scan; |
1139 | ff_fdctdsp_init(&ctx->fdsp, avctx); |
1140 | |
1141 | mps = ctx->mbs_per_slice; |
1142 | if (mps & (mps - 1)) { |
1143 | av_log(avctx, AV_LOG_ERROR, |
1144 | "there should be an integer power of two MBs per slice\n"); |
1145 | return AVERROR(EINVAL); |
1146 | } |
1147 | if (ctx->profile == PRORES_PROFILE_AUTO) { |
1148 | const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt); |
1149 | ctx->profile = (desc->flags & AV_PIX_FMT_FLAG_ALPHA || |
1150 | !(desc->log2_chroma_w + desc->log2_chroma_h)) |
1151 | ? PRORES_PROFILE_4444 : PRORES_PROFILE_HQ; |
1152 | av_log(avctx, AV_LOG_INFO, "Autoselected %s. It can be overridden " |
1153 | "through -profile option.\n", ctx->profile == PRORES_PROFILE_4444 |
1154 | ? "4:4:4:4 profile because of the used input colorspace" |
1155 | : "HQ profile to keep best quality"); |
1156 | } |
1157 | if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) { |
1158 | if (ctx->profile != PRORES_PROFILE_4444) { |
1159 | // force alpha and warn |
1160 | av_log(avctx, AV_LOG_WARNING, "Profile selected will not " |
1161 | "encode alpha. Override with -profile if needed.\n"); |
1162 | ctx->alpha_bits = 0; |
1163 | } |
1164 | if (ctx->alpha_bits & 7) { |
1165 | av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n"); |
1166 | return AVERROR(EINVAL); |
1167 | } |
1168 | avctx->bits_per_coded_sample = 32; |
1169 | } else { |
1170 | ctx->alpha_bits = 0; |
1171 | } |
1172 | |
1173 | ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10 |
1174 | ? CFACTOR_Y422 |
1175 | : CFACTOR_Y444; |
1176 | ctx->profile_info = prores_profile_info + ctx->profile; |
1177 | ctx->num_planes = 3 + !!ctx->alpha_bits; |
1178 | |
1179 | ctx->mb_width = FFALIGN(avctx->width, 16) >> 4; |
1180 | |
1181 | if (interlaced) |
1182 | ctx->mb_height = FFALIGN(avctx->height, 32) >> 5; |
1183 | else |
1184 | ctx->mb_height = FFALIGN(avctx->height, 16) >> 4; |
1185 | |
1186 | ctx->slices_width = ctx->mb_width / mps; |
1187 | ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps); |
1188 | ctx->slices_per_picture = ctx->mb_height * ctx->slices_width; |
1189 | ctx->pictures_per_frame = 1 + interlaced; |
1190 | |
1191 | if (ctx->quant_sel == -1) |
1192 | ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant]; |
1193 | else |
1194 | ctx->quant_mat = prores_quant_matrices[ctx->quant_sel]; |
1195 | |
1196 | if (strlen(ctx->vendor) != 4) { |
1197 | av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n"); |
1198 | return AVERROR_INVALIDDATA; |
1199 | } |
1200 | |
1201 | ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA; |
1202 | if (!ctx->force_quant) { |
1203 | if (!ctx->bits_per_mb) { |
1204 | for (i = 0; i < NUM_MB_LIMITS - 1; i++) |
1205 | if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height * |
1206 | ctx->pictures_per_frame) |
1207 | break; |
1208 | ctx->bits_per_mb = ctx->profile_info->br_tab[i]; |
1209 | } else if (ctx->bits_per_mb < 128) { |
1210 | av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n"); |
1211 | return AVERROR_INVALIDDATA; |
1212 | } |
1213 | |
1214 | min_quant = ctx->profile_info->min_quant; |
1215 | max_quant = ctx->profile_info->max_quant; |
1216 | for (i = min_quant; i < MAX_STORED_Q; i++) { |
1217 | for (j = 0; j < 64; j++) |
1218 | ctx->quants[i][j] = ctx->quant_mat[j] * i; |
1219 | } |
1220 | |
1221 | ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q)); |
1222 | if (!ctx->slice_q) { |
1223 | encode_close(avctx); |
1224 | return AVERROR(ENOMEM); |
1225 | } |
1226 | |
1227 | ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata)); |
1228 | if (!ctx->tdata) { |
1229 | encode_close(avctx); |
1230 | return AVERROR(ENOMEM); |
1231 | } |
1232 | |
1233 | for (j = 0; j < avctx->thread_count; j++) { |
1234 | ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1) |
1235 | * TRELLIS_WIDTH |
1236 | * sizeof(*ctx->tdata->nodes)); |
1237 | if (!ctx->tdata[j].nodes) { |
1238 | encode_close(avctx); |
1239 | return AVERROR(ENOMEM); |
1240 | } |
1241 | for (i = min_quant; i < max_quant + 2; i++) { |
1242 | ctx->tdata[j].nodes[i].prev_node = -1; |
1243 | ctx->tdata[j].nodes[i].bits = 0; |
1244 | ctx->tdata[j].nodes[i].score = 0; |
1245 | } |
1246 | } |
1247 | } else { |
1248 | int ls = 0; |
1249 | |
1250 | if (ctx->force_quant > 64) { |
1251 | av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n"); |
1252 | return AVERROR_INVALIDDATA; |
1253 | } |
1254 | |
1255 | for (j = 0; j < 64; j++) { |
1256 | ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant; |
1257 | ls += av_log2((1 << 11) / ctx->quants[0][j]) * 2 + 1; |
1258 | } |
1259 | |
1260 | ctx->bits_per_mb = ls * 8; |
1261 | if (ctx->chroma_factor == CFACTOR_Y444) |
1262 | ctx->bits_per_mb += ls * 4; |
1263 | } |
1264 | |
1265 | ctx->frame_size_upper_bound = (ctx->pictures_per_frame * |
1266 | ctx->slices_per_picture + 1) * |
1267 | (2 + 2 * ctx->num_planes + |
1268 | (mps * ctx->bits_per_mb) / 8) |
1269 | + 200; |
1270 | |
1271 | if (ctx->alpha_bits) { |
1272 | // The alpha plane is run-coded and might exceed the bit budget. |
1273 | ctx->frame_size_upper_bound += (ctx->pictures_per_frame * |
1274 | ctx->slices_per_picture + 1) * |
1275 | /* num pixels per slice */ (ctx->mbs_per_slice * 256 * |
1276 | /* bits per pixel */ (1 + ctx->alpha_bits + 1) + 7 >> 3); |
1277 | } |
1278 | |
1279 | avctx->codec_tag = ctx->profile_info->tag; |
1280 | |
1281 | av_log(avctx, AV_LOG_DEBUG, |
1282 | "profile %d, %d slices, interlacing: %s, %d bits per MB\n", |
1283 | ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame, |
1284 | interlaced ? "yes" : "no", ctx->bits_per_mb); |
1285 | av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n", |
1286 | ctx->frame_size_upper_bound); |
1287 | |
1288 | return 0; |
1289 | } |
1290 | |
1291 | #define OFFSET(x) offsetof(ProresContext, x) |
1292 | #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM |
1293 | |
1294 | static const AVOption options[] = { |
1295 | { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice), |
1296 | AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE }, |
1297 | { "profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT, |
1298 | { .i64 = PRORES_PROFILE_AUTO }, |
1299 | PRORES_PROFILE_AUTO, PRORES_PROFILE_4444, VE, "profile" }, |
1300 | { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_AUTO }, |
1301 | 0, 0, VE, "profile" }, |
1302 | { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY }, |
1303 | 0, 0, VE, "profile" }, |
1304 | { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT }, |
1305 | 0, 0, VE, "profile" }, |
1306 | { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD }, |
1307 | 0, 0, VE, "profile" }, |
1308 | { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ }, |
1309 | 0, 0, VE, "profile" }, |
1310 | { "4444", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 }, |
1311 | 0, 0, VE, "profile" }, |
1312 | { "vendor", "vendor ID", OFFSET(vendor), |
1313 | AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE }, |
1314 | { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb), |
1315 | AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE }, |
1316 | { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT, |
1317 | { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" }, |
1318 | { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 }, |
1319 | 0, 0, VE, "quant_mat" }, |
1320 | { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY }, |
1321 | 0, 0, VE, "quant_mat" }, |
1322 | { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT }, |
1323 | 0, 0, VE, "quant_mat" }, |
1324 | { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD }, |
1325 | 0, 0, VE, "quant_mat" }, |
1326 | { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ }, |
1327 | 0, 0, VE, "quant_mat" }, |
1328 | { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT }, |
1329 | 0, 0, VE, "quant_mat" }, |
1330 | { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT, |
1331 | { .i64 = 16 }, 0, 16, VE }, |
1332 | { NULL } |
1333 | }; |
1334 | |
1335 | static const AVClass proresenc_class = { |
1336 | .class_name = "ProRes encoder", |
1337 | .item_name = av_default_item_name, |
1338 | .option = options, |
1339 | .version = LIBAVUTIL_VERSION_INT, |
1340 | }; |
1341 | |
1342 | AVCodec ff_prores_ks_encoder = { |
1343 | .name = "prores_ks", |
1344 | .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"), |
1345 | .type = AVMEDIA_TYPE_VIDEO, |
1346 | .id = AV_CODEC_ID_PRORES, |
1347 | .priv_data_size = sizeof(ProresContext), |
1348 | .init = encode_init, |
1349 | .close = encode_close, |
1350 | .encode2 = encode_frame, |
1351 | .capabilities = AV_CODEC_CAP_SLICE_THREADS, |
1352 | .pix_fmts = (const enum AVPixelFormat[]) { |
1353 | AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10, |
1354 | AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE |
1355 | }, |
1356 | .priv_class = &proresenc_class, |
1357 | }; |
1358 |