summaryrefslogtreecommitdiff
path: root/libavcodec/cinepakenc.c (plain)
blob: a28f6690708f62cec00b35f8d0fcf405b1094c02
1/*
2 * Cinepak encoder (c) 2011 Tomas Härdin
3 * http://titan.codemill.se/~tomhar/cinepakenc.patch
4 *
5 * Fixes and improvements, vintage decoders compatibility
6 * (c) 2013, 2014 Rl, Aetey Global Technologies AB
7
8Permission is hereby granted, free of charge, to any person obtaining a
9copy of this software and associated documentation files (the "Software"),
10to deal in the Software without restriction, including without limitation
11the rights to use, copy, modify, merge, publish, distribute, sublicense,
12and/or sell copies of the Software, and to permit persons to whom the
13Software is furnished to do so, subject to the following conditions:
14
15The above copyright notice and this permission notice shall be included
16in all copies or substantial portions of the Software.
17
18THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
22OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
23ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24OTHER DEALINGS IN THE SOFTWARE.
25
26 * MAYBE:
27 * - "optimally" split the frame into several non-regular areas
28 * using a separate codebook pair for each area and approximating
29 * the area by several rectangular strips (generally not full width ones)
30 * (use quadtree splitting? a simple fixed-granularity grid?)
31 *
32 *
33 * version 2014-01-23 Rl
34 * - added option handling for flexibility
35 *
36 * version 2014-01-21 Rl
37 * - believe it or not, now we get even smaller files, with better quality
38 * (which means I missed an optimization earlier :)
39 *
40 * version 2014-01-20 Rl
41 * - made the encoder compatible with vintage decoders
42 * and added some yet unused code for possible future
43 * incremental codebook updates
44 * - fixed a small memory leak
45 *
46 * version 2013-04-28 Rl
47 * - bugfixed codebook optimization logic
48 *
49 * version 2013-02-14 Rl
50 * "Valentine's Day" version:
51 * - made strip division more robust
52 * - minimized bruteforcing the number of strips,
53 * (costs some R/D but speeds up compession a lot), the heuristic
54 * assumption is that score as a function of the number of strips has
55 * one wide minimum which moves slowly, of course not fully true
56 * - simplified codebook generation,
57 * the old code was meant for other optimizations than we actually do
58 * - optimized the codebook generation / error estimation for MODE_MC
59 *
60 * version 2013-02-12 Rl
61 * - separated codebook training sets, avoided the transfer of wasted bytes,
62 * which yields both better quality and smaller files
63 * - now using the correct colorspace (TODO: move conversion to libswscale)
64 *
65 * version 2013-02-08 Rl
66 * - fixes/optimization in multistrip encoding and codebook size choice,
67 * quality/bitrate is now better than that of the binary proprietary encoder
68 */
69
70#include "libavutil/intreadwrite.h"
71#include "avcodec.h"
72#include "libavutil/lfg.h"
73#include "elbg.h"
74#include "internal.h"
75
76#include "libavutil/avassert.h"
77#include "libavutil/opt.h"
78
79#define CVID_HEADER_SIZE 10
80#define STRIP_HEADER_SIZE 12
81#define CHUNK_HEADER_SIZE 4
82
83#define MB_SIZE 4 //4x4 MBs
84#define MB_AREA (MB_SIZE*MB_SIZE)
85
86#define VECTOR_MAX 6 //six or four entries per vector depending on format
87#define CODEBOOK_MAX 256 //size of a codebook
88
89#define MAX_STRIPS 32 //Note: having fewer choices regarding the number of strips speeds up encoding (obviously)
90#define MIN_STRIPS 1 //Note: having more strips speeds up encoding the frame (this is less obvious)
91// MAX_STRIPS limits the maximum quality you can reach
92// when you want high quality on high resolutions,
93// MIN_STRIPS limits the minimum efficiently encodable bit rate
94// on low resolutions
95// the numbers are only used for brute force optimization for the first frame,
96// for the following frames they are adaptively readjusted
97// NOTE the decoder in ffmpeg has its own arbitrary limitation on the number
98// of strips, currently 32
99
100typedef enum {
101 MODE_V1_ONLY = 0,
102 MODE_V1_V4,
103 MODE_MC,
104
105 MODE_COUNT,
106} CinepakMode;
107
108typedef enum {
109 ENC_V1,
110 ENC_V4,
111 ENC_SKIP,
112
113 ENC_UNCERTAIN
114} mb_encoding;
115
116typedef struct {
117 int v1_vector; //index into v1 codebook
118 int v1_error; //error when using V1 encoding
119 int v4_vector[4]; //indices into v4 codebook
120 int v4_error; //error when using V4 encoding
121 int skip_error; //error when block is skipped (aka copied from last frame)
122 mb_encoding best_encoding; //last result from calculate_mode_score()
123} mb_info;
124
125typedef struct {
126 int v1_codebook[CODEBOOK_MAX*VECTOR_MAX];
127 int v4_codebook[CODEBOOK_MAX*VECTOR_MAX];
128 int v1_size;
129 int v4_size;
130 CinepakMode mode;
131} strip_info;
132
133typedef struct {
134 const AVClass *class;
135 AVCodecContext *avctx;
136 unsigned char *pict_bufs[4], *strip_buf, *frame_buf;
137 AVFrame *last_frame;
138 AVFrame *best_frame;
139 AVFrame *scratch_frame;
140 AVFrame *input_frame;
141 enum AVPixelFormat pix_fmt;
142 int w, h;
143 int frame_buf_size;
144 int curframe, keyint;
145 AVLFG randctx;
146 uint64_t lambda;
147 int *codebook_input;
148 int *codebook_closest;
149 mb_info *mb; //MB RD state
150 int min_strips; //the current limit
151 int max_strips; //the current limit
152#ifdef CINEPAKENC_DEBUG
153 mb_info *best_mb; //TODO: remove. only used for printing stats
154 int num_v1_mode, num_v4_mode, num_mc_mode;
155 int num_v1_encs, num_v4_encs, num_skips;
156#endif
157// options
158 int max_extra_cb_iterations;
159 int skip_empty_cb;
160 int min_min_strips;
161 int max_max_strips;
162 int strip_number_delta_range;
163} CinepakEncContext;
164
165#define OFFSET(x) offsetof(CinepakEncContext, x)
166#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
167static const AVOption options[] = {
168 { "max_extra_cb_iterations", "Max extra codebook recalculation passes, more is better and slower", OFFSET(max_extra_cb_iterations), AV_OPT_TYPE_INT, { .i64 = 2 }, 0, INT_MAX, VE },
169 { "skip_empty_cb", "Avoid wasting bytes, ignore vintage MacOS decoder", OFFSET(skip_empty_cb), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
170 { "max_strips", "Limit strips/frame, vintage compatible is 1..3, otherwise the more the better", OFFSET(max_max_strips), AV_OPT_TYPE_INT, { .i64 = 3 }, MIN_STRIPS, MAX_STRIPS, VE },
171 { "min_strips", "Enforce min strips/frame, more is worse and faster, must be <= max_strips", OFFSET(min_min_strips), AV_OPT_TYPE_INT, { .i64 = MIN_STRIPS }, MIN_STRIPS, MAX_STRIPS, VE },
172 { "strip_number_adaptivity", "How fast the strip number adapts, more is slightly better, much slower", OFFSET(strip_number_delta_range), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, MAX_STRIPS-MIN_STRIPS, VE },
173 { NULL },
174};
175
176static const AVClass cinepak_class = {
177 .class_name = "cinepak",
178 .item_name = av_default_item_name,
179 .option = options,
180 .version = LIBAVUTIL_VERSION_INT,
181};
182
183static av_cold int cinepak_encode_init(AVCodecContext *avctx)
184{
185 CinepakEncContext *s = avctx->priv_data;
186 int x, mb_count, strip_buf_size, frame_buf_size;
187
188 if (avctx->width & 3 || avctx->height & 3) {
189 av_log(avctx, AV_LOG_ERROR, "width and height must be multiples of four (got %ix%i)\n",
190 avctx->width, avctx->height);
191 return AVERROR(EINVAL);
192 }
193
194 if (s->min_min_strips > s->max_max_strips) {
195 av_log(avctx, AV_LOG_ERROR, "minimal number of strips can not exceed maximal (got %i and %i)\n",
196 s->min_min_strips, s->max_max_strips);
197 return AVERROR(EINVAL);
198 }
199
200 if (!(s->last_frame = av_frame_alloc()))
201 return AVERROR(ENOMEM);
202 if (!(s->best_frame = av_frame_alloc()))
203 goto enomem;
204 if (!(s->scratch_frame = av_frame_alloc()))
205 goto enomem;
206 if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
207 if (!(s->input_frame = av_frame_alloc()))
208 goto enomem;
209
210 if (!(s->codebook_input = av_malloc(sizeof(int) * (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4) * (avctx->width * avctx->height) >> 2)))
211 goto enomem;
212
213 if (!(s->codebook_closest = av_malloc(sizeof(int) * (avctx->width * avctx->height) >> 2)))
214 goto enomem;
215
216 for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
217 if(!(s->pict_bufs[x] = av_malloc((avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4) * (avctx->width * avctx->height) >> 2)))
218 goto enomem;
219
220 mb_count = avctx->width * avctx->height / MB_AREA;
221
222 //the largest possible chunk is 0x31 with all MBs encoded in V4 mode
223 //and full codebooks being replaced in INTER mode,
224 // which is 34 bits per MB
225 //and 2*256 extra flag bits per strip
226 strip_buf_size = STRIP_HEADER_SIZE + 3 * CHUNK_HEADER_SIZE + 2 * VECTOR_MAX * CODEBOOK_MAX + 4 * (mb_count + (mb_count + 15) / 16) + (2 * CODEBOOK_MAX)/8;
227
228 frame_buf_size = CVID_HEADER_SIZE + s->max_max_strips * strip_buf_size;
229
230 if (!(s->strip_buf = av_malloc(strip_buf_size)))
231 goto enomem;
232
233 if (!(s->frame_buf = av_malloc(frame_buf_size)))
234 goto enomem;
235
236 if (!(s->mb = av_malloc_array(mb_count, sizeof(mb_info))))
237 goto enomem;
238
239#ifdef CINEPAKENC_DEBUG
240 if (!(s->best_mb = av_malloc_array(mb_count, sizeof(mb_info))))
241 goto enomem;
242#endif
243
244 av_lfg_init(&s->randctx, 1);
245 s->avctx = avctx;
246 s->w = avctx->width;
247 s->h = avctx->height;
248 s->frame_buf_size = frame_buf_size;
249 s->curframe = 0;
250 s->keyint = avctx->keyint_min;
251 s->pix_fmt = avctx->pix_fmt;
252
253 //set up AVFrames
254 s->last_frame->data[0] = s->pict_bufs[0];
255 s->last_frame->linesize[0] = s->w;
256 s->best_frame->data[0] = s->pict_bufs[1];
257 s->best_frame->linesize[0] = s->w;
258 s->scratch_frame->data[0] = s->pict_bufs[2];
259 s->scratch_frame->linesize[0] = s->w;
260
261 if (s->pix_fmt == AV_PIX_FMT_RGB24) {
262 s->last_frame->data[1] = s->last_frame->data[0] + s->w * s->h;
263 s->last_frame->data[2] = s->last_frame->data[1] + ((s->w * s->h) >> 2);
264 s->last_frame->linesize[1] = s->last_frame->linesize[2] = s->w >> 1;
265
266 s->best_frame->data[1] = s->best_frame->data[0] + s->w * s->h;
267 s->best_frame->data[2] = s->best_frame->data[1] + ((s->w * s->h) >> 2);
268 s->best_frame->linesize[1] = s->best_frame->linesize[2] = s->w >> 1;
269
270 s->scratch_frame->data[1] = s->scratch_frame->data[0] + s->w * s->h;
271 s->scratch_frame->data[2] = s->scratch_frame->data[1] + ((s->w * s->h) >> 2);
272 s->scratch_frame->linesize[1] = s->scratch_frame->linesize[2] = s->w >> 1;
273
274 s->input_frame->data[0] = s->pict_bufs[3];
275 s->input_frame->linesize[0] = s->w;
276 s->input_frame->data[1] = s->input_frame->data[0] + s->w * s->h;
277 s->input_frame->data[2] = s->input_frame->data[1] + ((s->w * s->h) >> 2);
278 s->input_frame->linesize[1] = s->input_frame->linesize[2] = s->w >> 1;
279 }
280
281 s->min_strips = s->min_min_strips;
282 s->max_strips = s->max_max_strips;
283
284#ifdef CINEPAKENC_DEBUG
285 s->num_v1_mode = s->num_v4_mode = s->num_mc_mode = s->num_v1_encs = s->num_v4_encs = s->num_skips = 0;
286#endif
287
288 return 0;
289
290enomem:
291 av_frame_free(&s->last_frame);
292 av_frame_free(&s->best_frame);
293 av_frame_free(&s->scratch_frame);
294 if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
295 av_frame_free(&s->input_frame);
296 av_freep(&s->codebook_input);
297 av_freep(&s->codebook_closest);
298 av_freep(&s->strip_buf);
299 av_freep(&s->frame_buf);
300 av_freep(&s->mb);
301#ifdef CINEPAKENC_DEBUG
302 av_freep(&s->best_mb);
303#endif
304
305 for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
306 av_freep(&s->pict_bufs[x]);
307
308 return AVERROR(ENOMEM);
309}
310
311static int64_t calculate_mode_score(CinepakEncContext *s, int h, strip_info *info, int report, int *training_set_v1_shrunk, int *training_set_v4_shrunk
312#ifdef CINEPAK_REPORT_SERR
313, int64_t *serr
314#endif
315)
316{
317 //score = FF_LAMBDA_SCALE * error + lambda * bits
318 int x;
319 int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
320 int mb_count = s->w * h / MB_AREA;
321 mb_info *mb;
322 int64_t score1, score2, score3;
323 int64_t ret = s->lambda * ((info->v1_size ? CHUNK_HEADER_SIZE + info->v1_size * entry_size : 0) +
324 (info->v4_size ? CHUNK_HEADER_SIZE + info->v4_size * entry_size : 0) +
325 CHUNK_HEADER_SIZE) << 3;
326
327 //av_log(s->avctx, AV_LOG_INFO, "sizes %3i %3i -> %9"PRId64" score mb_count %i", info->v1_size, info->v4_size, ret, mb_count);
328
329#ifdef CINEPAK_REPORT_SERR
330 *serr = 0;
331#endif
332
333 switch(info->mode) {
334 case MODE_V1_ONLY:
335 //one byte per MB
336 ret += s->lambda * 8 * mb_count;
337
338// while calculating we assume all blocks are ENC_V1
339 for(x = 0; x < mb_count; x++) {
340 mb = &s->mb[x];
341 ret += FF_LAMBDA_SCALE * mb->v1_error;
342#ifdef CINEPAK_REPORT_SERR
343 *serr += mb->v1_error;
344#endif
345// this function is never called for report in MODE_V1_ONLY
346// if(!report)
347 mb->best_encoding = ENC_V1;
348 }
349
350 break;
351 case MODE_V1_V4:
352 //9 or 33 bits per MB
353 if(report) {
354// no moves between the corresponding training sets are allowed
355 *training_set_v1_shrunk = *training_set_v4_shrunk = 0;
356 for(x = 0; x < mb_count; x++) {
357 int mberr;
358 mb = &s->mb[x];
359 if(mb->best_encoding == ENC_V1)
360 score1 = s->lambda * 9 + FF_LAMBDA_SCALE * (mberr=mb->v1_error);
361 else
362 score1 = s->lambda * 33 + FF_LAMBDA_SCALE * (mberr=mb->v4_error);
363 ret += score1;
364#ifdef CINEPAK_REPORT_SERR
365 *serr += mberr;
366#endif
367 }
368 } else { // find best mode per block
369 for(x = 0; x < mb_count; x++) {
370 mb = &s->mb[x];
371 score1 = s->lambda * 9 + FF_LAMBDA_SCALE * mb->v1_error;
372 score2 = s->lambda * 33 + FF_LAMBDA_SCALE * mb->v4_error;
373
374 if(score1 <= score2) {
375 ret += score1;
376#ifdef CINEPAK_REPORT_SERR
377 *serr += mb->v1_error;
378#endif
379 mb->best_encoding = ENC_V1;
380 } else {
381 ret += score2;
382#ifdef CINEPAK_REPORT_SERR
383 *serr += mb->v4_error;
384#endif
385 mb->best_encoding = ENC_V4;
386 }
387 }
388 }
389
390 break;
391 case MODE_MC:
392 //1, 10 or 34 bits per MB
393 if(report) {
394 int v1_shrunk = 0, v4_shrunk = 0;
395 for(x = 0; x < mb_count; x++) {
396 mb = &s->mb[x];
397// it is OK to move blocks to ENC_SKIP here
398// but not to any codebook encoding!
399 score1 = s->lambda * 1 + FF_LAMBDA_SCALE * mb->skip_error;
400 if(mb->best_encoding == ENC_SKIP) {
401 ret += score1;
402#ifdef CINEPAK_REPORT_SERR
403 *serr += mb->skip_error;
404#endif
405 } else if(mb->best_encoding == ENC_V1) {
406 if((score2=s->lambda * 10 + FF_LAMBDA_SCALE * mb->v1_error) >= score1) {
407 mb->best_encoding = ENC_SKIP;
408 ++v1_shrunk;
409 ret += score1;
410#ifdef CINEPAK_REPORT_SERR
411 *serr += mb->skip_error;
412#endif
413 } else {
414 ret += score2;
415#ifdef CINEPAK_REPORT_SERR
416 *serr += mb->v1_error;
417#endif
418 }
419 } else {
420 if((score3=s->lambda * 34 + FF_LAMBDA_SCALE * mb->v4_error) >= score1) {
421 mb->best_encoding = ENC_SKIP;
422 ++v4_shrunk;
423 ret += score1;
424#ifdef CINEPAK_REPORT_SERR
425 *serr += mb->skip_error;
426#endif
427 } else {
428 ret += score3;
429#ifdef CINEPAK_REPORT_SERR
430 *serr += mb->v4_error;
431#endif
432 }
433 }
434 }
435 *training_set_v1_shrunk = v1_shrunk;
436 *training_set_v4_shrunk = v4_shrunk;
437 } else { // find best mode per block
438 for(x = 0; x < mb_count; x++) {
439 mb = &s->mb[x];
440 score1 = s->lambda * 1 + FF_LAMBDA_SCALE * mb->skip_error;
441 score2 = s->lambda * 10 + FF_LAMBDA_SCALE * mb->v1_error;
442 score3 = s->lambda * 34 + FF_LAMBDA_SCALE * mb->v4_error;
443
444 if(score1 <= score2 && score1 <= score3) {
445 ret += score1;
446#ifdef CINEPAK_REPORT_SERR
447 *serr += mb->skip_error;
448#endif
449 mb->best_encoding = ENC_SKIP;
450 } else if(score2 <= score3) {
451 ret += score2;
452#ifdef CINEPAK_REPORT_SERR
453 *serr += mb->v1_error;
454#endif
455 mb->best_encoding = ENC_V1;
456 } else {
457 ret += score3;
458#ifdef CINEPAK_REPORT_SERR
459 *serr += mb->v4_error;
460#endif
461 mb->best_encoding = ENC_V4;
462 }
463 }
464 }
465
466 break;
467 }
468
469 return ret;
470}
471
472static int write_chunk_header(unsigned char *buf, int chunk_type, int chunk_size)
473{
474 buf[0] = chunk_type;
475 AV_WB24(&buf[1], chunk_size + CHUNK_HEADER_SIZE);
476 return CHUNK_HEADER_SIZE;
477}
478
479static int encode_codebook(CinepakEncContext *s, int *codebook, int size, int chunk_type_yuv, int chunk_type_gray, unsigned char *buf)
480{
481 int x, y, ret, entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
482 int incremental_codebook_replacement_mode = 0; // hardcoded here,
483 // the compiler should notice that this is a constant -- rl
484
485 ret = write_chunk_header(buf,
486 s->pix_fmt == AV_PIX_FMT_RGB24 ?
487 chunk_type_yuv+(incremental_codebook_replacement_mode?1:0) :
488 chunk_type_gray+(incremental_codebook_replacement_mode?1:0),
489 entry_size * size
490 + (incremental_codebook_replacement_mode?(size+31)/32*4:0) );
491
492// we do codebook encoding according to the "intra" mode
493// but we keep the "dead" code for reference in case we will want
494// to use incremental codebook updates (which actually would give us
495// "kind of" motion compensation, especially in 1 strip/frame case) -- rl
496// (of course, the code will be not useful as-is)
497 if(incremental_codebook_replacement_mode) {
498 int flags = 0;
499 int flagsind;
500 for(x = 0; x < size; x++) {
501 if(flags == 0) {
502 flagsind = ret;
503 ret += 4;
504 flags = 0x80000000;
505 } else
506 flags = ((flags>>1) | 0x80000000);
507 for(y = 0; y < entry_size; y++)
508 buf[ret++] = codebook[y + x*entry_size] ^ (y >= 4 ? 0x80 : 0);
509 if((flags&0xffffffff) == 0xffffffff) {
510 AV_WB32(&buf[flagsind], flags);
511 flags = 0;
512 }
513 }
514 if(flags)
515 AV_WB32(&buf[flagsind], flags);
516 } else
517 for(x = 0; x < size; x++)
518 for(y = 0; y < entry_size; y++)
519 buf[ret++] = codebook[y + x*entry_size] ^ (y >= 4 ? 0x80 : 0);
520
521 return ret;
522}
523
524//sets out to the sub picture starting at (x,y) in in
525static void get_sub_picture(CinepakEncContext *s, int x, int y,
526 uint8_t * in_data[4], int in_linesize[4],
527 uint8_t *out_data[4], int out_linesize[4])
528{
529 out_data[0] = in_data[0] + x + y * in_linesize[0];
530 out_linesize[0] = in_linesize[0];
531
532 if(s->pix_fmt == AV_PIX_FMT_RGB24) {
533 out_data[1] = in_data[1] + (x >> 1) + (y >> 1) * in_linesize[1];
534 out_linesize[1] = in_linesize[1];
535
536 out_data[2] = in_data[2] + (x >> 1) + (y >> 1) * in_linesize[2];
537 out_linesize[2] = in_linesize[2];
538 }
539}
540
541//decodes the V1 vector in mb into the 4x4 MB pointed to by data
542static void decode_v1_vector(CinepakEncContext *s, uint8_t *data[4],
543 int linesize[4], int v1_vector, strip_info *info)
544{
545 int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
546
547 data[0][0] =
548 data[0][1] =
549 data[0][ linesize[0]] =
550 data[0][1+ linesize[0]] = info->v1_codebook[v1_vector*entry_size];
551
552 data[0][2] =
553 data[0][3] =
554 data[0][2+ linesize[0]] =
555 data[0][3+ linesize[0]] = info->v1_codebook[v1_vector*entry_size+1];
556
557 data[0][2*linesize[0]] =
558 data[0][1+2*linesize[0]] =
559 data[0][ 3*linesize[0]] =
560 data[0][1+3*linesize[0]] = info->v1_codebook[v1_vector*entry_size+2];
561
562 data[0][2+2*linesize[0]] =
563 data[0][3+2*linesize[0]] =
564 data[0][2+3*linesize[0]] =
565 data[0][3+3*linesize[0]] = info->v1_codebook[v1_vector*entry_size+3];
566
567 if(s->pix_fmt == AV_PIX_FMT_RGB24) {
568 data[1][0] =
569 data[1][1] =
570 data[1][ linesize[1]] =
571 data[1][1+ linesize[1]] = info->v1_codebook[v1_vector*entry_size+4];
572
573 data[2][0] =
574 data[2][1] =
575 data[2][ linesize[2]] =
576 data[2][1+ linesize[2]] = info->v1_codebook[v1_vector*entry_size+5];
577 }
578}
579
580//decodes the V4 vectors in mb into the 4x4 MB pointed to by data
581static void decode_v4_vector(CinepakEncContext *s, uint8_t *data[4],
582 int linesize[4], int *v4_vector, strip_info *info)
583{
584 int i, x, y, entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
585
586 for(i = y = 0; y < 4; y += 2) {
587 for(x = 0; x < 4; x += 2, i++) {
588 data[0][x + y*linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size];
589 data[0][x+1 + y*linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+1];
590 data[0][x + (y+1)*linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+2];
591 data[0][x+1 + (y+1)*linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+3];
592
593 if(s->pix_fmt == AV_PIX_FMT_RGB24) {
594 data[1][(x>>1) + (y>>1)*linesize[1]] = info->v4_codebook[v4_vector[i]*entry_size+4];
595 data[2][(x>>1) + (y>>1)*linesize[2]] = info->v4_codebook[v4_vector[i]*entry_size+5];
596 }
597 }
598 }
599}
600
601static void copy_mb(CinepakEncContext *s,
602 uint8_t *a_data[4], int a_linesize[4],
603 uint8_t *b_data[4], int b_linesize[4])
604{
605 int y, p;
606
607 for(y = 0; y < MB_SIZE; y++) {
608 memcpy(a_data[0]+y*a_linesize[0], b_data[0]+y*b_linesize[0],
609 MB_SIZE);
610 }
611
612 if(s->pix_fmt == AV_PIX_FMT_RGB24) {
613 for(p = 1; p <= 2; p++) {
614 for(y = 0; y < MB_SIZE/2; y++) {
615 memcpy(a_data[p] + y*a_linesize[p],
616 b_data[p] + y*b_linesize[p],
617 MB_SIZE/2);
618 }
619 }
620 }
621}
622
623static int encode_mode(CinepakEncContext *s, int h,
624 uint8_t *scratch_data[4], int scratch_linesize[4],
625 uint8_t *last_data[4], int last_linesize[4],
626 strip_info *info, unsigned char *buf)
627{
628 int x, y, z, flags, bits, temp_size, header_ofs, ret = 0, mb_count = s->w * h / MB_AREA;
629 int needs_extra_bit, should_write_temp;
630 unsigned char temp[64]; //32/2 = 16 V4 blocks at 4 B each -> 64 B
631 mb_info *mb;
632 uint8_t *sub_scratch_data[4] = {0}, *sub_last_data[4] = {0};
633 int sub_scratch_linesize[4] = {0}, sub_last_linesize[4] = {0};
634
635 //encode codebooks
636////// MacOS vintage decoder compatibility dictates the presence of
637////// the codebook chunk even when the codebook is empty - pretty dumb...
638////// and also the certain order of the codebook chunks -- rl
639 if(info->v4_size || !s->skip_empty_cb)
640 ret += encode_codebook(s, info->v4_codebook, info->v4_size, 0x20, 0x24, buf + ret);
641
642 if(info->v1_size || !s->skip_empty_cb)
643 ret += encode_codebook(s, info->v1_codebook, info->v1_size, 0x22, 0x26, buf + ret);
644
645 //update scratch picture
646 for(z = y = 0; y < h; y += MB_SIZE) {
647 for(x = 0; x < s->w; x += MB_SIZE, z++) {
648 mb = &s->mb[z];
649
650 get_sub_picture(s, x, y, scratch_data, scratch_linesize,
651 sub_scratch_data, sub_scratch_linesize);
652
653 if(info->mode == MODE_MC && mb->best_encoding == ENC_SKIP) {
654 get_sub_picture(s, x, y,
655 last_data, last_linesize,
656 sub_last_data, sub_last_linesize);
657 copy_mb(s, sub_scratch_data, sub_scratch_linesize,
658 sub_last_data, sub_last_linesize);
659 } else if(info->mode == MODE_V1_ONLY || mb->best_encoding == ENC_V1)
660 decode_v1_vector(s, sub_scratch_data, sub_scratch_linesize,
661 mb->v1_vector, info);
662 else
663 decode_v4_vector(s, sub_scratch_data, sub_scratch_linesize,
664 mb->v4_vector, info);
665 }
666 }
667
668 switch(info->mode) {
669 case MODE_V1_ONLY:
670 //av_log(s->avctx, AV_LOG_INFO, "mb_count = %i\n", mb_count);
671 ret += write_chunk_header(buf + ret, 0x32, mb_count);
672
673 for(x = 0; x < mb_count; x++)
674 buf[ret++] = s->mb[x].v1_vector;
675
676 break;
677 case MODE_V1_V4:
678 //remember header position
679 header_ofs = ret;
680 ret += CHUNK_HEADER_SIZE;
681
682 for(x = 0; x < mb_count; x += 32) {
683 flags = 0;
684 for(y = x; y < FFMIN(x+32, mb_count); y++)
685 if(s->mb[y].best_encoding == ENC_V4)
686 flags |= 1 << (31 - y + x);
687
688 AV_WB32(&buf[ret], flags);
689 ret += 4;
690
691 for(y = x; y < FFMIN(x+32, mb_count); y++) {
692 mb = &s->mb[y];
693
694 if(mb->best_encoding == ENC_V1)
695 buf[ret++] = mb->v1_vector;
696 else
697 for(z = 0; z < 4; z++)
698 buf[ret++] = mb->v4_vector[z];
699 }
700 }
701
702 write_chunk_header(buf + header_ofs, 0x30, ret - header_ofs - CHUNK_HEADER_SIZE);
703
704 break;
705 case MODE_MC:
706 //remember header position
707 header_ofs = ret;
708 ret += CHUNK_HEADER_SIZE;
709 flags = bits = temp_size = 0;
710
711 for(x = 0; x < mb_count; x++) {
712 mb = &s->mb[x];
713 flags |= (mb->best_encoding != ENC_SKIP) << (31 - bits++);
714 needs_extra_bit = 0;
715 should_write_temp = 0;
716
717 if(mb->best_encoding != ENC_SKIP) {
718 if(bits < 32)
719 flags |= (mb->best_encoding == ENC_V4) << (31 - bits++);
720 else
721 needs_extra_bit = 1;
722 }
723
724 if(bits == 32) {
725 AV_WB32(&buf[ret], flags);
726 ret += 4;
727 flags = bits = 0;
728
729 if(mb->best_encoding == ENC_SKIP || needs_extra_bit) {
730 memcpy(&buf[ret], temp, temp_size);
731 ret += temp_size;
732 temp_size = 0;
733 } else
734 should_write_temp = 1;
735 }
736
737 if(needs_extra_bit) {
738 flags = (mb->best_encoding == ENC_V4) << 31;
739 bits = 1;
740 }
741
742 if(mb->best_encoding == ENC_V1)
743 temp[temp_size++] = mb->v1_vector;
744 else if(mb->best_encoding == ENC_V4)
745 for(z = 0; z < 4; z++)
746 temp[temp_size++] = mb->v4_vector[z];
747
748 if(should_write_temp) {
749 memcpy(&buf[ret], temp, temp_size);
750 ret += temp_size;
751 temp_size = 0;
752 }
753 }
754
755 if(bits > 0) {
756 AV_WB32(&buf[ret], flags);
757 ret += 4;
758 memcpy(&buf[ret], temp, temp_size);
759 ret += temp_size;
760 }
761
762 write_chunk_header(buf + header_ofs, 0x31, ret - header_ofs - CHUNK_HEADER_SIZE);
763
764 break;
765 }
766
767 return ret;
768}
769
770//computes distortion of 4x4 MB in b compared to a
771static int compute_mb_distortion(CinepakEncContext *s,
772 uint8_t *a_data[4], int a_linesize[4],
773 uint8_t *b_data[4], int b_linesize[4])
774{
775 int x, y, p, d, ret = 0;
776
777 for(y = 0; y < MB_SIZE; y++) {
778 for(x = 0; x < MB_SIZE; x++) {
779 d = a_data[0][x + y*a_linesize[0]] - b_data[0][x + y*b_linesize[0]];
780 ret += d*d;
781 }
782 }
783
784 if(s->pix_fmt == AV_PIX_FMT_RGB24) {
785 for(p = 1; p <= 2; p++) {
786 for(y = 0; y < MB_SIZE/2; y++) {
787 for(x = 0; x < MB_SIZE/2; x++) {
788 d = a_data[p][x + y*a_linesize[p]] - b_data[p][x + y*b_linesize[p]];
789 ret += d*d;
790 }
791 }
792 }
793 }
794
795 return ret;
796}
797
798// return the possibly adjusted size of the codebook
799#define CERTAIN(x) ((x)!=ENC_UNCERTAIN)
800static int quantize(CinepakEncContext *s, int h,
801 uint8_t *data[4], int linesize[4],
802 int v1mode, strip_info *info,
803 mb_encoding encoding)
804{
805 int x, y, i, j, k, x2, y2, x3, y3, plane, shift, mbn;
806 int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
807 int *codebook = v1mode ? info->v1_codebook : info->v4_codebook;
808 int size = v1mode ? info->v1_size : info->v4_size;
809 int64_t total_error = 0;
810 uint8_t vq_pict_buf[(MB_AREA*3)/2];
811 uint8_t *sub_data [4], *vq_data [4];
812 int sub_linesize[4], vq_linesize[4];
813
814 for(mbn = i = y = 0; y < h; y += MB_SIZE) {
815 for(x = 0; x < s->w; x += MB_SIZE, ++mbn) {
816 int *base;
817
818 if(CERTAIN(encoding)) {
819// use for the training only the blocks known to be to be encoded [sic:-]
820 if(s->mb[mbn].best_encoding != encoding) continue;
821 }
822
823 base = s->codebook_input + i*entry_size;
824 if(v1mode) {
825 //subsample
826 for(j = y2 = 0; y2 < entry_size; y2 += 2) {
827 for(x2 = 0; x2 < 4; x2 += 2, j++) {
828 plane = y2 < 4 ? 0 : 1 + (x2 >> 1);
829 shift = y2 < 4 ? 0 : 1;
830 x3 = shift ? 0 : x2;
831 y3 = shift ? 0 : y2;
832 base[j] = (data[plane][((x+x3) >> shift) + ((y+y3) >> shift) * linesize[plane]] +
833 data[plane][((x+x3) >> shift) + 1 + ((y+y3) >> shift) * linesize[plane]] +
834 data[plane][((x+x3) >> shift) + (((y+y3) >> shift) + 1) * linesize[plane]] +
835 data[plane][((x+x3) >> shift) + 1 + (((y+y3) >> shift) + 1) * linesize[plane]]) >> 2;
836 }
837 }
838 } else {
839 //copy
840 for(j = y2 = 0; y2 < MB_SIZE; y2 += 2) {
841 for(x2 = 0; x2 < MB_SIZE; x2 += 2) {
842 for(k = 0; k < entry_size; k++, j++) {
843 plane = k >= 4 ? k - 3 : 0;
844
845 if(k >= 4) {
846 x3 = (x+x2) >> 1;
847 y3 = (y+y2) >> 1;
848 } else {
849 x3 = x + x2 + (k & 1);
850 y3 = y + y2 + (k >> 1);
851 }
852
853 base[j] = data[plane][x3 + y3*linesize[plane]];
854 }
855 }
856 }
857 }
858 i += v1mode ? 1 : 4;
859 }
860 }
861// if(i < mbn*(v1mode ? 1 : 4)) {
862// av_log(s->avctx, AV_LOG_INFO, "reducing training set for %s from %i to %i (encoding %i)\n", v1mode?"v1":"v4", mbn*(v1mode ? 1 : 4), i, encoding);
863// }
864
865 if(i == 0) // empty training set, nothing to do
866 return 0;
867 if(i < size) {
868 //av_log(s->avctx, (CERTAIN(encoding) ? AV_LOG_ERROR : AV_LOG_INFO), "WOULD WASTE: %s cbsize %i bigger than training set size %i (encoding %i)\n", v1mode?"v1":"v4", size, i, encoding);
869 size = i;
870 }
871
872 avpriv_init_elbg(s->codebook_input, entry_size, i, codebook, size, 1, s->codebook_closest, &s->randctx);
873 avpriv_do_elbg(s->codebook_input, entry_size, i, codebook, size, 1, s->codebook_closest, &s->randctx);
874
875 //setup vq_data, which contains a single MB
876 vq_data[0] = vq_pict_buf;
877 vq_linesize[0] = MB_SIZE;
878 vq_data[1] = &vq_pict_buf[MB_AREA];
879 vq_data[2] = vq_data[1] + (MB_AREA >> 2);
880 vq_linesize[1] = vq_linesize[2] = MB_SIZE >> 1;
881
882 //copy indices
883 for(i = j = y = 0; y < h; y += MB_SIZE) {
884 for(x = 0; x < s->w; x += MB_SIZE, j++) {
885 mb_info *mb = &s->mb[j];
886// skip uninteresting blocks if we know their preferred encoding
887 if(CERTAIN(encoding) && mb->best_encoding != encoding)
888 continue;
889
890 //point sub_data to current MB
891 get_sub_picture(s, x, y, data, linesize, sub_data, sub_linesize);
892
893 if(v1mode) {
894 mb->v1_vector = s->codebook_closest[i];
895
896 //fill in vq_data with V1 data
897 decode_v1_vector(s, vq_data, vq_linesize, mb->v1_vector, info);
898
899 mb->v1_error = compute_mb_distortion(s, sub_data, sub_linesize,
900 vq_data, vq_linesize);
901 total_error += mb->v1_error;
902 } else {
903 for(k = 0; k < 4; k++)
904 mb->v4_vector[k] = s->codebook_closest[i+k];
905
906 //fill in vq_data with V4 data
907 decode_v4_vector(s, vq_data, vq_linesize, mb->v4_vector, info);
908
909 mb->v4_error = compute_mb_distortion(s, sub_data, sub_linesize,
910 vq_data, vq_linesize);
911 total_error += mb->v4_error;
912 }
913 i += v1mode ? 1 : 4;
914 }
915 }
916// check that we did it right in the beginning of the function
917 av_assert0(i >= size); // training set is no smaller than the codebook
918
919 //av_log(s->avctx, AV_LOG_INFO, "isv1 %i size= %i i= %i error %"PRId64"\n", v1mode, size, i, total_error);
920
921 return size;
922}
923
924static void calculate_skip_errors(CinepakEncContext *s, int h,
925 uint8_t *last_data[4], int last_linesize[4],
926 uint8_t *data[4], int linesize[4],
927 strip_info *info)
928{
929 int x, y, i;
930 uint8_t *sub_last_data [4], *sub_pict_data [4];
931 int sub_last_linesize[4], sub_pict_linesize[4];
932
933 for(i = y = 0; y < h; y += MB_SIZE) {
934 for(x = 0; x < s->w; x += MB_SIZE, i++) {
935 get_sub_picture(s, x, y, last_data, last_linesize,
936 sub_last_data, sub_last_linesize);
937 get_sub_picture(s, x, y, data, linesize,
938 sub_pict_data, sub_pict_linesize);
939
940 s->mb[i].skip_error = compute_mb_distortion(s,
941 sub_last_data, sub_last_linesize,
942 sub_pict_data, sub_pict_linesize);
943 }
944 }
945}
946
947static void write_strip_header(CinepakEncContext *s, int y, int h, int keyframe, unsigned char *buf, int strip_size)
948{
949// actually we are exclusively using intra strip coding (how much can we win
950// otherwise? how to choose which part of a codebook to update?),
951// keyframes are different only because we disallow ENC_SKIP on them -- rl
952// (besides, the logic here used to be inverted: )
953// buf[0] = keyframe ? 0x11: 0x10;
954 buf[0] = keyframe ? 0x10: 0x11;
955 AV_WB24(&buf[1], strip_size + STRIP_HEADER_SIZE);
956// AV_WB16(&buf[4], y); /* using absolute y values works -- rl */
957 AV_WB16(&buf[4], 0); /* using relative values works as well -- rl */
958 AV_WB16(&buf[6], 0);
959// AV_WB16(&buf[8], y+h); /* using absolute y values works -- rl */
960 AV_WB16(&buf[8], h); /* using relative values works as well -- rl */
961 AV_WB16(&buf[10], s->w);
962 //av_log(s->avctx, AV_LOG_INFO, "write_strip_header() %x keyframe=%d\n", buf[0], keyframe);
963}
964
965static int rd_strip(CinepakEncContext *s, int y, int h, int keyframe,
966 uint8_t *last_data[4], int last_linesize[4],
967 uint8_t *data[4], int linesize[4],
968 uint8_t *scratch_data[4], int scratch_linesize[4],
969 unsigned char *buf, int64_t *best_score
970#ifdef CINEPAK_REPORT_SERR
971, int64_t *best_serr
972#endif
973)
974{
975 int64_t score = 0;
976#ifdef CINEPAK_REPORT_SERR
977 int64_t serr;
978#endif
979 int best_size = 0;
980 strip_info info;
981// for codebook optimization:
982 int v1enough, v1_size, v4enough, v4_size;
983 int new_v1_size, new_v4_size;
984 int v1shrunk, v4shrunk;
985
986 if(!keyframe)
987 calculate_skip_errors(s, h, last_data, last_linesize, data, linesize,
988 &info);
989
990 //try some powers of 4 for the size of the codebooks
991 //constraint the v4 codebook to be no bigger than v1 one,
992 //(and no less than v1_size/4)
993 //thus making v1 preferable and possibly losing small details? should be ok
994#define SMALLEST_CODEBOOK 1
995 for(v1enough = 0, v1_size = SMALLEST_CODEBOOK; v1_size <= CODEBOOK_MAX && !v1enough; v1_size <<= 2) {
996 for(v4enough = 0, v4_size = 0; v4_size <= v1_size && !v4enough; v4_size = v4_size ? v4_size << 2 : v1_size >= SMALLEST_CODEBOOK << 2 ? v1_size >> 2 : SMALLEST_CODEBOOK) {
997 //try all modes
998 for(CinepakMode mode = 0; mode < MODE_COUNT; mode++) {
999 //don't allow MODE_MC in intra frames
1000 if(keyframe && mode == MODE_MC)
1001 continue;
1002
1003 if(mode == MODE_V1_ONLY) {
1004 info.v1_size = v1_size;
1005// the size may shrink even before optimizations if the input is short:
1006 info.v1_size = quantize(s, h, data, linesize, 1,
1007 &info, ENC_UNCERTAIN);
1008 if(info.v1_size < v1_size)
1009// too few eligible blocks, no sense in trying bigger sizes
1010 v1enough = 1;
1011
1012 info.v4_size = 0;
1013 } else { // mode != MODE_V1_ONLY
1014 // if v4 codebook is empty then only allow V1-only mode
1015 if(!v4_size)
1016 continue;
1017
1018 if(mode == MODE_V1_V4) {
1019 info.v4_size = v4_size;
1020 info.v4_size = quantize(s, h, data, linesize, 0,
1021 &info, ENC_UNCERTAIN);
1022 if(info.v4_size < v4_size)
1023// too few eligible blocks, no sense in trying bigger sizes
1024 v4enough = 1;
1025 }
1026 }
1027
1028 info.mode = mode;
1029// choose the best encoding per block, based on current experience
1030 score = calculate_mode_score(s, h, &info, 0,
1031 &v1shrunk, &v4shrunk
1032#ifdef CINEPAK_REPORT_SERR
1033, &serr
1034#endif
1035);
1036
1037 if(mode != MODE_V1_ONLY){
1038 int extra_iterations_limit = s->max_extra_cb_iterations;
1039// recompute the codebooks, omitting the extra blocks
1040// we assume we _may_ come here with more blocks to encode than before
1041 info.v1_size = v1_size;
1042 new_v1_size = quantize(s, h, data, linesize, 1, &info, ENC_V1);
1043 if(new_v1_size < info.v1_size){
1044 //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v1 codebook to %i entries\n", mode, v1_size, v4_size, new_v1_size);
1045 info.v1_size = new_v1_size;
1046 }
1047// we assume we _may_ come here with more blocks to encode than before
1048 info.v4_size = v4_size;
1049 new_v4_size = quantize(s, h, data, linesize, 0, &info, ENC_V4);
1050 if(new_v4_size < info.v4_size) {
1051 //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v4 codebook to %i entries at first iteration\n", mode, v1_size, v4_size, new_v4_size);
1052 info.v4_size = new_v4_size;
1053 }
1054// calculate the resulting score
1055// (do not move blocks to codebook encodings now, as some blocks may have
1056// got bigger errors despite a smaller training set - but we do not
1057// ever grow the training sets back)
1058 for(;;) {
1059 score = calculate_mode_score(s, h, &info, 1,
1060 &v1shrunk, &v4shrunk
1061#ifdef CINEPAK_REPORT_SERR
1062, &serr
1063#endif
1064);
1065// do we have a reason to reiterate? if so, have we reached the limit?
1066 if((!v1shrunk && !v4shrunk) || !extra_iterations_limit--) break;
1067// recompute the codebooks, omitting the extra blocks
1068 if(v1shrunk) {
1069 info.v1_size = v1_size;
1070 new_v1_size = quantize(s, h, data, linesize, 1, &info, ENC_V1);
1071 if(new_v1_size < info.v1_size){
1072 //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v1 codebook to %i entries\n", mode, v1_size, v4_size, new_v1_size);
1073 info.v1_size = new_v1_size;
1074 }
1075 }
1076 if(v4shrunk) {
1077 info.v4_size = v4_size;
1078 new_v4_size = quantize(s, h, data, linesize, 0, &info, ENC_V4);
1079 if(new_v4_size < info.v4_size) {
1080 //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v4 codebook to %i entries\n", mode, v1_size, v4_size, new_v4_size);
1081 info.v4_size = new_v4_size;
1082 }
1083 }
1084 }
1085 }
1086
1087 //av_log(s->avctx, AV_LOG_INFO, "%3i %3i score = %"PRId64"\n", v1_size, v4_size, score);
1088
1089 if(best_size == 0 || score < *best_score) {
1090
1091 *best_score = score;
1092#ifdef CINEPAK_REPORT_SERR
1093 *best_serr = serr;
1094#endif
1095 best_size = encode_mode(s, h,
1096 scratch_data, scratch_linesize,
1097 last_data, last_linesize, &info,
1098 s->strip_buf + STRIP_HEADER_SIZE);
1099
1100 //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18"PRId64" %i B", mode, info.v1_size, info.v4_size, score, best_size);
1101 //av_log(s->avctx, AV_LOG_INFO, "\n");
1102#ifdef CINEPAK_REPORT_SERR
1103 av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18"PRId64" %i B\n", mode, v1_size, v4_size, serr, best_size);
1104#endif
1105
1106#ifdef CINEPAKENC_DEBUG
1107 //save MB encoding choices
1108 memcpy(s->best_mb, s->mb, mb_count*sizeof(mb_info));
1109#endif
1110
1111 //memcpy(strip_temp + STRIP_HEADER_SIZE, strip_temp, best_size);
1112 write_strip_header(s, y, h, keyframe, s->strip_buf, best_size);
1113
1114 }
1115 }
1116 }
1117 }
1118
1119#ifdef CINEPAKENC_DEBUG
1120 //gather stats. this will only work properly of MAX_STRIPS == 1
1121 if(best_info.mode == MODE_V1_ONLY) {
1122 s->num_v1_mode++;
1123 s->num_v1_encs += s->w*h/MB_AREA;
1124 } else {
1125 if(best_info.mode == MODE_V1_V4)
1126 s->num_v4_mode++;
1127 else
1128 s->num_mc_mode++;
1129
1130 int x;
1131 for(x = 0; x < s->w*h/MB_AREA; x++)
1132 if(s->best_mb[x].best_encoding == ENC_V1)
1133 s->num_v1_encs++;
1134 else if(s->best_mb[x].best_encoding == ENC_V4)
1135 s->num_v4_encs++;
1136 else
1137 s->num_skips++;
1138 }
1139#endif
1140
1141 best_size += STRIP_HEADER_SIZE;
1142 memcpy(buf, s->strip_buf, best_size);
1143
1144 return best_size;
1145}
1146
1147static int write_cvid_header(CinepakEncContext *s, unsigned char *buf, int num_strips, int data_size, int isakeyframe)
1148{
1149 buf[0] = isakeyframe ? 0 : 1;
1150 AV_WB24(&buf[1], data_size + CVID_HEADER_SIZE);
1151 AV_WB16(&buf[4], s->w);
1152 AV_WB16(&buf[6], s->h);
1153 AV_WB16(&buf[8], num_strips);
1154
1155 return CVID_HEADER_SIZE;
1156}
1157
1158static int rd_frame(CinepakEncContext *s, const AVFrame *frame,
1159 int isakeyframe, unsigned char *buf, int buf_size)
1160{
1161 int num_strips, strip, i, y, nexty, size, temp_size;
1162 uint8_t *last_data [4], *data [4], *scratch_data [4];
1163 int last_linesize[4], linesize[4], scratch_linesize[4];
1164 int64_t best_score = 0, score, score_temp;
1165#ifdef CINEPAK_REPORT_SERR
1166 int64_t best_serr = 0, serr, serr_temp;
1167#endif
1168
1169 int best_nstrips = -1, best_size = -1; // mark as uninitialzed
1170
1171 if(s->pix_fmt == AV_PIX_FMT_RGB24) {
1172 int x;
1173// build a copy of the given frame in the correct colorspace
1174 for(y = 0; y < s->h; y += 2) {
1175 for(x = 0; x < s->w; x += 2) {
1176 uint8_t *ir[2]; int32_t r, g, b, rr, gg, bb;
1177 ir[0] = frame->data[0] + x*3 + y*frame->linesize[0];
1178 ir[1] = ir[0] + frame->linesize[0];
1179 get_sub_picture(s, x, y,
1180 s->input_frame->data, s->input_frame->linesize,
1181 scratch_data, scratch_linesize);
1182 r = g = b = 0;
1183 for(i=0; i<4; ++i) {
1184 int i1, i2;
1185 i1 = (i&1); i2 = (i>=2);
1186 rr = ir[i2][i1*3+0];
1187 gg = ir[i2][i1*3+1];
1188 bb = ir[i2][i1*3+2];
1189 r += rr; g += gg; b += bb;
1190// using fixed point arithmetic for portable repeatability, scaling by 2^23
1191// "Y"
1192// rr = 0.2857*rr + 0.5714*gg + 0.1429*bb;
1193 rr = (2396625*rr + 4793251*gg + 1198732*bb) >> 23;
1194 if( rr < 0) rr = 0;
1195 else if (rr > 255) rr = 255;
1196 scratch_data[0][i1 + i2*scratch_linesize[0]] = rr;
1197 }
1198// let us scale down as late as possible
1199// r /= 4; g /= 4; b /= 4;
1200// "U"
1201// rr = -0.1429*r - 0.2857*g + 0.4286*b;
1202 rr = (-299683*r - 599156*g + 898839*b) >> 23;
1203 if( rr < -128) rr = -128;
1204 else if (rr > 127) rr = 127;
1205 scratch_data[1][0] = rr + 128; // quantize needs unsigned
1206// "V"
1207// rr = 0.3571*r - 0.2857*g - 0.0714*b;
1208 rr = (748893*r - 599156*g - 149737*b) >> 23;
1209 if( rr < -128) rr = -128;
1210 else if (rr > 127) rr = 127;
1211 scratch_data[2][0] = rr + 128; // quantize needs unsigned
1212 }
1213 }
1214 }
1215
1216 //would be nice but quite certainly incompatible with vintage players:
1217 // support encoding zero strips (meaning skip the whole frame)
1218 for(num_strips = s->min_strips; num_strips <= s->max_strips && num_strips <= s->h / MB_SIZE; num_strips++) {
1219 score = 0;
1220 size = 0;
1221#ifdef CINEPAK_REPORT_SERR
1222 serr = 0;
1223#endif
1224
1225 for(y = 0, strip = 1; y < s->h; strip++, y = nexty) {
1226 int strip_height;
1227
1228 nexty = strip * s->h / num_strips; // <= s->h
1229 //make nexty the next multiple of 4 if not already there
1230 if(nexty & 3)
1231 nexty += 4 - (nexty & 3);
1232
1233 strip_height = nexty - y;
1234 if(strip_height <= 0) { // can this ever happen?
1235 av_log(s->avctx, AV_LOG_INFO, "skipping zero height strip %i of %i\n", strip, num_strips);
1236 continue;
1237 }
1238
1239 if(s->pix_fmt == AV_PIX_FMT_RGB24)
1240 get_sub_picture(s, 0, y,
1241 s->input_frame->data, s->input_frame->linesize,
1242 data, linesize);
1243 else
1244 get_sub_picture(s, 0, y,
1245 (uint8_t **)frame->data, (int*)frame->linesize,
1246 data, linesize);
1247 get_sub_picture(s, 0, y,
1248 s->last_frame->data, s->last_frame->linesize,
1249 last_data, last_linesize);
1250 get_sub_picture(s, 0, y,
1251 s->scratch_frame->data, s->scratch_frame->linesize,
1252 scratch_data, scratch_linesize);
1253
1254 if((temp_size = rd_strip(s, y, strip_height, isakeyframe,
1255 last_data, last_linesize, data, linesize,
1256 scratch_data, scratch_linesize,
1257 s->frame_buf + size + CVID_HEADER_SIZE, &score_temp
1258#ifdef CINEPAK_REPORT_SERR
1259, &serr_temp
1260#endif
1261)) < 0)
1262 return temp_size;
1263
1264 score += score_temp;
1265#ifdef CINEPAK_REPORT_SERR
1266 serr += serr_temp;
1267#endif
1268 size += temp_size;
1269 //av_log(s->avctx, AV_LOG_INFO, "strip %d, isakeyframe=%d", strip, isakeyframe);
1270 //av_log(s->avctx, AV_LOG_INFO, "\n");
1271 }
1272
1273 if(best_score == 0 || score < best_score) {
1274 best_score = score;
1275#ifdef CINEPAK_REPORT_SERR
1276 best_serr = serr;
1277#endif
1278 best_size = size + write_cvid_header(s, s->frame_buf, num_strips, size, isakeyframe);
1279 //av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12"PRId64", %i B\n", num_strips, score, best_size);
1280#ifdef CINEPAK_REPORT_SERR
1281 av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12"PRId64", %i B\n", num_strips, serr, best_size);
1282#endif
1283
1284 FFSWAP(AVFrame *, s->best_frame, s->scratch_frame);
1285 memcpy(buf, s->frame_buf, best_size);
1286 best_nstrips = num_strips;
1287 }
1288// avoid trying too many strip numbers without a real reason
1289// (this makes the processing of the very first frame faster)
1290 if(num_strips - best_nstrips > 4)
1291 break;
1292 }
1293
1294 av_assert0(best_nstrips >= 0 && best_size >= 0);
1295
1296// let the number of strips slowly adapt to the changes in the contents,
1297// compared to full bruteforcing every time this will occasionally lead
1298// to some r/d performance loss but makes encoding up to several times faster
1299 if(!s->strip_number_delta_range) {
1300 if(best_nstrips == s->max_strips) { // let us try to step up
1301 s->max_strips = best_nstrips + 1;
1302 if(s->max_strips >= s->max_max_strips)
1303 s->max_strips = s->max_max_strips;
1304 } else { // try to step down
1305 s->max_strips = best_nstrips;
1306 }
1307 s->min_strips = s->max_strips - 1;
1308 if(s->min_strips < s->min_min_strips)
1309 s->min_strips = s->min_min_strips;
1310 } else {
1311 s->max_strips = best_nstrips + s->strip_number_delta_range;
1312 if(s->max_strips >= s->max_max_strips)
1313 s->max_strips = s->max_max_strips;
1314 s->min_strips = best_nstrips - s->strip_number_delta_range;
1315 if(s->min_strips < s->min_min_strips)
1316 s->min_strips = s->min_min_strips;
1317 }
1318
1319 return best_size;
1320}
1321
1322static int cinepak_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
1323 const AVFrame *frame, int *got_packet)
1324{
1325 CinepakEncContext *s = avctx->priv_data;
1326 int ret;
1327
1328 s->lambda = frame->quality ? frame->quality - 1 : 2 * FF_LAMBDA_SCALE;
1329
1330 if ((ret = ff_alloc_packet2(avctx, pkt, s->frame_buf_size, 0)) < 0)
1331 return ret;
1332 ret = rd_frame(s, frame, (s->curframe == 0), pkt->data, s->frame_buf_size);
1333 pkt->size = ret;
1334 if (s->curframe == 0)
1335 pkt->flags |= AV_PKT_FLAG_KEY;
1336 *got_packet = 1;
1337
1338 FFSWAP(AVFrame *, s->last_frame, s->best_frame);
1339
1340 if (++s->curframe >= s->keyint)
1341 s->curframe = 0;
1342
1343 return 0;
1344}
1345
1346static av_cold int cinepak_encode_end(AVCodecContext *avctx)
1347{
1348 CinepakEncContext *s = avctx->priv_data;
1349 int x;
1350
1351 av_frame_free(&s->last_frame);
1352 av_frame_free(&s->best_frame);
1353 av_frame_free(&s->scratch_frame);
1354 if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
1355 av_frame_free(&s->input_frame);
1356 av_freep(&s->codebook_input);
1357 av_freep(&s->codebook_closest);
1358 av_freep(&s->strip_buf);
1359 av_freep(&s->frame_buf);
1360 av_freep(&s->mb);
1361#ifdef CINEPAKENC_DEBUG
1362 av_freep(&s->best_mb);
1363#endif
1364
1365 for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
1366 av_freep(&s->pict_bufs[x]);
1367
1368#ifdef CINEPAKENC_DEBUG
1369 av_log(avctx, AV_LOG_INFO, "strip coding stats: %i V1 mode, %i V4 mode, %i MC mode (%i V1 encs, %i V4 encs, %i skips)\n",
1370 s->num_v1_mode, s->num_v4_mode, s->num_mc_mode, s->num_v1_encs, s->num_v4_encs, s->num_skips);
1371#endif
1372
1373 return 0;
1374}
1375
1376AVCodec ff_cinepak_encoder = {
1377 .name = "cinepak",
1378 .type = AVMEDIA_TYPE_VIDEO,
1379 .id = AV_CODEC_ID_CINEPAK,
1380 .priv_data_size = sizeof(CinepakEncContext),
1381 .init = cinepak_encode_init,
1382 .encode2 = cinepak_encode_frame,
1383 .close = cinepak_encode_end,
1384 .pix_fmts = (const enum AVPixelFormat[]){AV_PIX_FMT_RGB24, AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE},
1385 .long_name = NULL_IF_CONFIG_SMALL("Cinepak"),
1386 .priv_class = &cinepak_class,
1387};
1388