platform/external/ffmpeg.git - Unnamed repository; edit this file 'description' to name the repository.

1 /*
2  * Cinepak encoder (c) 2011 Tomas Härdin
3  * http://titan.codemill.se/~tomhar/cinepakenc.patch
4  *
5  * Fixes and improvements, vintage decoders compatibility
6  *  (c) 2013, 2014 Rl, Aetey Global Technologies AB
7
8 Permission is hereby granted, free of charge, to any person obtaining a
9 copy of this software and associated documentation files (the "Software"),
10 to deal in the Software without restriction, including without limitation
11 the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 and/or sell copies of the Software, and to permit persons to whom the
13 Software is furnished to do so, subject to the following conditions:
14
15 The above copyright notice and this permission notice shall be included
16 in all copies or substantial portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
22 OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
23 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 OTHER DEALINGS IN THE SOFTWARE.
25
26  * MAYBE:
27  * - "optimally" split the frame into several non-regular areas
28  *   using a separate codebook pair for each area and approximating
29  *   the area by several rectangular strips (generally not full width ones)
30  *   (use quadtree splitting? a simple fixed-granularity grid?)
31  *
32  *
33  * version 2014-01-23 Rl
34  * - added option handling for flexibility
35  *
36  * version 2014-01-21 Rl
37  * - believe it or not, now we get even smaller files, with better quality
38  *   (which means I missed an optimization earlier :)
39  *
40  * version 2014-01-20 Rl
41  * - made the encoder compatible with vintage decoders
42  *   and added some yet unused code for possible future
43  *   incremental codebook updates
44  * - fixed a small memory leak
45  *
46  * version 2013-04-28 Rl
47  * - bugfixed codebook optimization logic
48  *
49  * version 2013-02-14 Rl
50  * "Valentine's Day" version:
51  * - made strip division more robust
52  * - minimized bruteforcing the number of strips,
53  *   (costs some R/D but speeds up compession a lot), the heuristic
54  *   assumption is that score as a function of the number of strips has
55  *   one wide minimum which moves slowly, of course not fully true
56  * - simplified codebook generation,
57  *   the old code was meant for other optimizations than we actually do
58  * - optimized the codebook generation / error estimation for MODE_MC
59  *
60  * version 2013-02-12 Rl
61  * - separated codebook training sets, avoided the transfer of wasted bytes,
62  *   which yields both better quality and smaller files
63  * - now using the correct colorspace (TODO: move conversion to libswscale)
64  *
65  * version 2013-02-08 Rl
66  * - fixes/optimization in multistrip encoding and codebook size choice,
67  *   quality/bitrate is now better than that of the binary proprietary encoder
68  */
69
70 #include "libavutil/intreadwrite.h"
71 #include "avcodec.h"
72 #include "libavutil/lfg.h"
73 #include "elbg.h"
74 #include "internal.h"
75
76 #include "libavutil/avassert.h"
77 #include "libavutil/opt.h"
78
79 #define CVID_HEADER_SIZE 10
80 #define STRIP_HEADER_SIZE 12
81 #define CHUNK_HEADER_SIZE 4
82
83 #define MB_SIZE 4           //4x4 MBs
84 #define MB_AREA (MB_SIZE*MB_SIZE)
85
86 #define VECTOR_MAX 6        //six or four entries per vector depending on format
87 #define CODEBOOK_MAX 256    //size of a codebook
88
89 #define MAX_STRIPS  32      //Note: having fewer choices regarding the number of strips speeds up encoding (obviously)
90 #define MIN_STRIPS  1       //Note: having more strips speeds up encoding the frame (this is less obvious)
91 // MAX_STRIPS limits the maximum quality you can reach
92 //            when you want high quality on high resolutions,
93 // MIN_STRIPS limits the minimum efficiently encodable bit rate
94 //            on low resolutions
95 // the numbers are only used for brute force optimization for the first frame,
96 // for the following frames they are adaptively readjusted
97 // NOTE the decoder in ffmpeg has its own arbitrary limitation on the number
98 // of strips, currently 32
99
100 typedef enum {
101     MODE_V1_ONLY = 0,
102     MODE_V1_V4,
103     MODE_MC,
104
105     MODE_COUNT,
106 } CinepakMode;
107
108 typedef enum {
109     ENC_V1,
110     ENC_V4,
111     ENC_SKIP,
112
113     ENC_UNCERTAIN
114 } mb_encoding;
115
116 typedef struct {
117     int v1_vector;                  //index into v1 codebook
118     int v1_error;                   //error when using V1 encoding
119     int v4_vector[4];               //indices into v4 codebook
120     int v4_error;                   //error when using V4 encoding
121     int skip_error;                 //error when block is skipped (aka copied from last frame)
122     mb_encoding best_encoding;      //last result from calculate_mode_score()
123 } mb_info;
124
125 typedef struct {
126     int v1_codebook[CODEBOOK_MAX*VECTOR_MAX];
127     int v4_codebook[CODEBOOK_MAX*VECTOR_MAX];
128     int v1_size;
129     int v4_size;
130     CinepakMode mode;
131 } strip_info;
132
133 typedef struct {
134     const AVClass *class;
135     AVCodecContext *avctx;
136     unsigned char *pict_bufs[4], *strip_buf, *frame_buf;
137     AVFrame *last_frame;
138     AVFrame *best_frame;
139     AVFrame *scratch_frame;
140     AVFrame *input_frame;
141     enum AVPixelFormat pix_fmt;
142     int w, h;
143     int frame_buf_size;
144     int curframe, keyint;
145     AVLFG randctx;
146     uint64_t lambda;
147     int *codebook_input;
148     int *codebook_closest;
149     mb_info *mb;                                //MB RD state
150     int min_strips;          //the current limit
151     int max_strips;          //the current limit
152 #ifdef CINEPAKENC_DEBUG
153     mb_info *best_mb;                           //TODO: remove. only used for printing stats
154     int num_v1_mode, num_v4_mode, num_mc_mode;
155     int num_v1_encs, num_v4_encs, num_skips;
156 #endif
157 // options
158     int max_extra_cb_iterations;
159     int skip_empty_cb;
160     int min_min_strips;
161     int max_max_strips;
162     int strip_number_delta_range;
163 } CinepakEncContext;
164
165 #define OFFSET(x) offsetof(CinepakEncContext, x)
166 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
167 static const AVOption options[] = {
168     { "max_extra_cb_iterations", "Max extra codebook recalculation passes, more is better and slower", OFFSET(max_extra_cb_iterations), AV_OPT_TYPE_INT, { .i64 = 2 }, 0, INT_MAX, VE },
169     { "skip_empty_cb", "Avoid wasting bytes, ignore vintage MacOS decoder", OFFSET(skip_empty_cb), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
170     { "max_strips", "Limit strips/frame, vintage compatible is 1..3, otherwise the more the better", OFFSET(max_max_strips), AV_OPT_TYPE_INT, { .i64 = 3 }, MIN_STRIPS, MAX_STRIPS, VE },
171     { "min_strips", "Enforce min strips/frame, more is worse and faster, must be <= max_strips", OFFSET(min_min_strips), AV_OPT_TYPE_INT, { .i64 = MIN_STRIPS }, MIN_STRIPS, MAX_STRIPS, VE },
172     { "strip_number_adaptivity", "How fast the strip number adapts, more is slightly better, much slower", OFFSET(strip_number_delta_range), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, MAX_STRIPS-MIN_STRIPS, VE },
173     { NULL },
174 };
175
176 static const AVClass cinepak_class = {
177     .class_name = "cinepak",
178     .item_name  = av_default_item_name,
179     .option     = options,
180     .version    = LIBAVUTIL_VERSION_INT,
181 };
182
183 static av_cold int cinepak_encode_init(AVCodecContext *avctx)
184 {
185     CinepakEncContext *s = avctx->priv_data;
186     int x, mb_count, strip_buf_size, frame_buf_size;
187
188     if (avctx->width & 3 || avctx->height & 3) {
189         av_log(avctx, AV_LOG_ERROR, "width and height must be multiples of four (got %ix%i)\n",
190                 avctx->width, avctx->height);
191         return AVERROR(EINVAL);
192     }
193
194     if (s->min_min_strips > s->max_max_strips) {
195         av_log(avctx, AV_LOG_ERROR, "minimal number of strips can not exceed maximal (got %i and %i)\n",
196                 s->min_min_strips, s->max_max_strips);
197         return AVERROR(EINVAL);
198     }
199
200     if (!(s->last_frame = av_frame_alloc()))
201         return AVERROR(ENOMEM);
202     if (!(s->best_frame = av_frame_alloc()))
203         goto enomem;
204     if (!(s->scratch_frame = av_frame_alloc()))
205         goto enomem;
206     if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
207         if (!(s->input_frame = av_frame_alloc()))
208             goto enomem;
209
210     if (!(s->codebook_input = av_malloc(sizeof(int) * (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4) * (avctx->width * avctx->height) >> 2)))
211         goto enomem;
212
213     if (!(s->codebook_closest = av_malloc(sizeof(int) * (avctx->width * avctx->height) >> 2)))
214         goto enomem;
215
216     for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
217         if(!(s->pict_bufs[x] = av_malloc((avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4) * (avctx->width * avctx->height) >> 2)))
218             goto enomem;
219
220     mb_count = avctx->width * avctx->height / MB_AREA;
221
222     //the largest possible chunk is 0x31 with all MBs encoded in V4 mode
223     //and full codebooks being replaced in INTER mode,
224     // which is 34 bits per MB
225     //and 2*256 extra flag bits per strip
226     strip_buf_size = STRIP_HEADER_SIZE + 3 * CHUNK_HEADER_SIZE + 2 * VECTOR_MAX * CODEBOOK_MAX + 4 * (mb_count + (mb_count + 15) / 16) + (2 * CODEBOOK_MAX)/8;
227
228     frame_buf_size = CVID_HEADER_SIZE + s->max_max_strips * strip_buf_size;
229
230     if (!(s->strip_buf = av_malloc(strip_buf_size)))
231         goto enomem;
232
233     if (!(s->frame_buf = av_malloc(frame_buf_size)))
234         goto enomem;
235
236     if (!(s->mb = av_malloc_array(mb_count, sizeof(mb_info))))
237         goto enomem;
238
239 #ifdef CINEPAKENC_DEBUG
240     if (!(s->best_mb = av_malloc_array(mb_count, sizeof(mb_info))))
241         goto enomem;
242 #endif
243
244     av_lfg_init(&s->randctx, 1);
245     s->avctx = avctx;
246     s->w = avctx->width;
247     s->h = avctx->height;
248     s->frame_buf_size = frame_buf_size;
249     s->curframe = 0;
250     s->keyint = avctx->keyint_min;
251     s->pix_fmt = avctx->pix_fmt;
252
253     //set up AVFrames
254     s->last_frame->data[0]        = s->pict_bufs[0];
255     s->last_frame->linesize[0]    = s->w;
256     s->best_frame->data[0]        = s->pict_bufs[1];
257     s->best_frame->linesize[0]    = s->w;
258     s->scratch_frame->data[0]     = s->pict_bufs[2];
259     s->scratch_frame->linesize[0] = s->w;
260
261     if (s->pix_fmt == AV_PIX_FMT_RGB24) {
262         s->last_frame->data[1]        = s->last_frame->data[0] + s->w * s->h;
263         s->last_frame->data[2]        = s->last_frame->data[1] + ((s->w * s->h) >> 2);
264         s->last_frame->linesize[1]    = s->last_frame->linesize[2] = s->w >> 1;
265
266         s->best_frame->data[1]        = s->best_frame->data[0] + s->w * s->h;
267         s->best_frame->data[2]        = s->best_frame->data[1] + ((s->w * s->h) >> 2);
268         s->best_frame->linesize[1]    = s->best_frame->linesize[2] = s->w >> 1;
269
270         s->scratch_frame->data[1]     = s->scratch_frame->data[0] + s->w * s->h;
271         s->scratch_frame->data[2]     = s->scratch_frame->data[1] + ((s->w * s->h) >> 2);
272         s->scratch_frame->linesize[1] = s->scratch_frame->linesize[2] = s->w >> 1;
273
274         s->input_frame->data[0]       = s->pict_bufs[3];
275         s->input_frame->linesize[0]   = s->w;
276         s->input_frame->data[1]       = s->input_frame->data[0] + s->w * s->h;
277         s->input_frame->data[2]       = s->input_frame->data[1] + ((s->w * s->h) >> 2);
278         s->input_frame->linesize[1]   = s->input_frame->linesize[2] = s->w >> 1;
279     }
280
281     s->min_strips = s->min_min_strips;
282     s->max_strips = s->max_max_strips;
283
284 #ifdef CINEPAKENC_DEBUG
285     s->num_v1_mode = s->num_v4_mode = s->num_mc_mode = s->num_v1_encs = s->num_v4_encs = s->num_skips = 0;
286 #endif
287
288     return 0;
289
290 enomem:
291     av_frame_free(&s->last_frame);
292     av_frame_free(&s->best_frame);
293     av_frame_free(&s->scratch_frame);
294     if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
295         av_frame_free(&s->input_frame);
296     av_freep(&s->codebook_input);
297     av_freep(&s->codebook_closest);
298     av_freep(&s->strip_buf);
299     av_freep(&s->frame_buf);
300     av_freep(&s->mb);
301 #ifdef CINEPAKENC_DEBUG
302     av_freep(&s->best_mb);
303 #endif
304
305     for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
306         av_freep(&s->pict_bufs[x]);
307
308     return AVERROR(ENOMEM);
309 }
310
311 static int64_t calculate_mode_score(CinepakEncContext *s, int h, strip_info *info, int report, int *training_set_v1_shrunk, int *training_set_v4_shrunk
312 #ifdef CINEPAK_REPORT_SERR
313 , int64_t *serr
314 #endif
315 )
316 {
317     //score = FF_LAMBDA_SCALE * error + lambda * bits
318     int x;
319     int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
320     int mb_count = s->w * h / MB_AREA;
321     mb_info *mb;
322     int64_t score1, score2, score3;
323     int64_t ret = s->lambda * ((info->v1_size ? CHUNK_HEADER_SIZE + info->v1_size * entry_size : 0) +
324                    (info->v4_size ? CHUNK_HEADER_SIZE + info->v4_size * entry_size : 0) +
325                    CHUNK_HEADER_SIZE) << 3;
326
327     //av_log(s->avctx, AV_LOG_INFO, "sizes %3i %3i -> %9"PRId64" score mb_count %i", info->v1_size, info->v4_size, ret, mb_count);
328
329 #ifdef CINEPAK_REPORT_SERR
330     *serr = 0;
331 #endif
332
333     switch(info->mode) {
334     case MODE_V1_ONLY:
335         //one byte per MB
336         ret += s->lambda * 8 * mb_count;
337
338 // while calculating we assume all blocks are ENC_V1
339         for(x = 0; x < mb_count; x++) {
340             mb = &s->mb[x];
341             ret += FF_LAMBDA_SCALE * mb->v1_error;
342 #ifdef CINEPAK_REPORT_SERR
343             *serr += mb->v1_error;
344 #endif
345 // this function is never called for report in MODE_V1_ONLY
346 //            if(!report)
347             mb->best_encoding = ENC_V1;
348         }
349
350         break;
351     case MODE_V1_V4:
352         //9 or 33 bits per MB
353         if(report) {
354 // no moves between the corresponding training sets are allowed
355             *training_set_v1_shrunk = *training_set_v4_shrunk = 0;
356             for(x = 0; x < mb_count; x++) {
357                 int mberr;
358                 mb = &s->mb[x];
359                 if(mb->best_encoding == ENC_V1)
360                     score1 = s->lambda * 9  + FF_LAMBDA_SCALE * (mberr=mb->v1_error);
361                 else
362                     score1 = s->lambda * 33 + FF_LAMBDA_SCALE * (mberr=mb->v4_error);
363                 ret += score1;
364 #ifdef CINEPAK_REPORT_SERR
365                 *serr += mberr;
366 #endif
367             }
368         } else { // find best mode per block
369             for(x = 0; x < mb_count; x++) {
370                 mb = &s->mb[x];
371                 score1 = s->lambda * 9  + FF_LAMBDA_SCALE * mb->v1_error;
372                 score2 = s->lambda * 33 + FF_LAMBDA_SCALE * mb->v4_error;
373
374                 if(score1 <= score2) {
375                     ret += score1;
376 #ifdef CINEPAK_REPORT_SERR
377                     *serr += mb->v1_error;
378 #endif
379                     mb->best_encoding = ENC_V1;
380                 } else {
381                     ret += score2;
382 #ifdef CINEPAK_REPORT_SERR
383                     *serr += mb->v4_error;
384 #endif
385                     mb->best_encoding = ENC_V4;
386                 }
387             }
388         }
389
390         break;
391     case MODE_MC:
392         //1, 10 or 34 bits per MB
393         if(report) {
394             int v1_shrunk = 0, v4_shrunk = 0;
395             for(x = 0; x < mb_count; x++) {
396                 mb = &s->mb[x];
397 // it is OK to move blocks to ENC_SKIP here
398 // but not to any codebook encoding!
399                 score1 = s->lambda * 1  + FF_LAMBDA_SCALE * mb->skip_error;
400                 if(mb->best_encoding == ENC_SKIP) {
401                     ret += score1;
402 #ifdef CINEPAK_REPORT_SERR
403                     *serr += mb->skip_error;
404 #endif
405                 } else if(mb->best_encoding == ENC_V1) {
406                     if((score2=s->lambda * 10 + FF_LAMBDA_SCALE * mb->v1_error) >= score1) {
407                         mb->best_encoding = ENC_SKIP;
408                         ++v1_shrunk;
409                         ret += score1;
410 #ifdef CINEPAK_REPORT_SERR
411                         *serr += mb->skip_error;
412 #endif
413                     } else {
414                         ret += score2;
415 #ifdef CINEPAK_REPORT_SERR
416                         *serr += mb->v1_error;
417 #endif
418                     }
419                 } else {
420                     if((score3=s->lambda * 34 + FF_LAMBDA_SCALE * mb->v4_error) >= score1) {
421                         mb->best_encoding = ENC_SKIP;
422                         ++v4_shrunk;
423                         ret += score1;
424 #ifdef CINEPAK_REPORT_SERR
425                         *serr += mb->skip_error;
426 #endif
427                     } else {
428                         ret += score3;
429 #ifdef CINEPAK_REPORT_SERR
430                         *serr += mb->v4_error;
431 #endif
432                     }
433                 }
434             }
435             *training_set_v1_shrunk = v1_shrunk;
436             *training_set_v4_shrunk = v4_shrunk;
437         } else { // find best mode per block
438             for(x = 0; x < mb_count; x++) {
439                 mb = &s->mb[x];
440                 score1 = s->lambda * 1  + FF_LAMBDA_SCALE * mb->skip_error;
441                 score2 = s->lambda * 10 + FF_LAMBDA_SCALE * mb->v1_error;
442                 score3 = s->lambda * 34 + FF_LAMBDA_SCALE * mb->v4_error;
443
444                 if(score1 <= score2 && score1 <= score3) {
445                     ret += score1;
446 #ifdef CINEPAK_REPORT_SERR
447                     *serr += mb->skip_error;
448 #endif
449                     mb->best_encoding = ENC_SKIP;
450                 } else if(score2 <= score3) {
451                     ret += score2;
452 #ifdef CINEPAK_REPORT_SERR
453                     *serr += mb->v1_error;
454 #endif
455                     mb->best_encoding = ENC_V1;
456                 } else {
457                     ret += score3;
458 #ifdef CINEPAK_REPORT_SERR
459                     *serr += mb->v4_error;
460 #endif
461                     mb->best_encoding = ENC_V4;
462                 }
463             }
464         }
465
466         break;
467     }
468
469     return ret;
470 }
471
472 static int write_chunk_header(unsigned char *buf, int chunk_type, int chunk_size)
473 {
474     buf[0] = chunk_type;
475     AV_WB24(&buf[1], chunk_size + CHUNK_HEADER_SIZE);
476     return CHUNK_HEADER_SIZE;
477 }
478
479 static int encode_codebook(CinepakEncContext *s, int *codebook, int size, int chunk_type_yuv, int chunk_type_gray, unsigned char *buf)
480 {
481     int x, y, ret, entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
482     int incremental_codebook_replacement_mode = 0; // hardcoded here,
483                 // the compiler should notice that this is a constant -- rl
484
485     ret = write_chunk_header(buf,
486           s->pix_fmt == AV_PIX_FMT_RGB24 ?
487            chunk_type_yuv+(incremental_codebook_replacement_mode?1:0) :
488            chunk_type_gray+(incremental_codebook_replacement_mode?1:0),
489           entry_size * size
490            + (incremental_codebook_replacement_mode?(size+31)/32*4:0) );
491
492 // we do codebook encoding according to the "intra" mode
493 // but we keep the "dead" code for reference in case we will want
494 // to use incremental codebook updates (which actually would give us
495 // "kind of" motion compensation, especially in 1 strip/frame case) -- rl
496 // (of course, the code will be not useful as-is)
497     if(incremental_codebook_replacement_mode) {
498         int flags = 0;
499         int flagsind;
500         for(x = 0; x < size; x++) {
501             if(flags == 0) {
502                 flagsind = ret;
503                 ret += 4;
504                 flags = 0x80000000;
505             } else
506                 flags = ((flags>>1) | 0x80000000);
507             for(y = 0; y < entry_size; y++)
508                 buf[ret++] = codebook[y + x*entry_size] ^ (y >= 4 ? 0x80 : 0);
509             if((flags&0xffffffff) == 0xffffffff) {
510                 AV_WB32(&buf[flagsind], flags);
511                 flags = 0;
512             }
513         }
514         if(flags)
515             AV_WB32(&buf[flagsind], flags);
516     } else
517         for(x = 0; x < size; x++)
518             for(y = 0; y < entry_size; y++)
519                 buf[ret++] = codebook[y + x*entry_size] ^ (y >= 4 ? 0x80 : 0);
520
521     return ret;
522 }
523
524 //sets out to the sub picture starting at (x,y) in in
525 static void get_sub_picture(CinepakEncContext *s, int x, int y,
526                             uint8_t * in_data[4], int  in_linesize[4],
527                             uint8_t *out_data[4], int out_linesize[4])
528 {
529     out_data[0] = in_data[0] + x + y * in_linesize[0];
530     out_linesize[0] = in_linesize[0];
531
532     if(s->pix_fmt == AV_PIX_FMT_RGB24) {
533         out_data[1] = in_data[1] + (x >> 1) + (y >> 1) * in_linesize[1];
534         out_linesize[1] = in_linesize[1];
535
536         out_data[2] = in_data[2] + (x >> 1) + (y >> 1) * in_linesize[2];
537         out_linesize[2] = in_linesize[2];
538     }
539 }
540
541 //decodes the V1 vector in mb into the 4x4 MB pointed to by data
542 static void decode_v1_vector(CinepakEncContext *s, uint8_t *data[4],
543                              int linesize[4], int v1_vector, strip_info *info)
544 {
545     int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
546
547     data[0][0] =
548             data[0][1] =
549             data[0][    linesize[0]] =
550             data[0][1+  linesize[0]] = info->v1_codebook[v1_vector*entry_size];
551
552     data[0][2] =
553             data[0][3] =
554             data[0][2+  linesize[0]] =
555             data[0][3+  linesize[0]] = info->v1_codebook[v1_vector*entry_size+1];
556
557     data[0][2*linesize[0]] =
558             data[0][1+2*linesize[0]] =
559             data[0][  3*linesize[0]] =
560             data[0][1+3*linesize[0]] = info->v1_codebook[v1_vector*entry_size+2];
561
562     data[0][2+2*linesize[0]] =
563             data[0][3+2*linesize[0]] =
564             data[0][2+3*linesize[0]] =
565             data[0][3+3*linesize[0]] = info->v1_codebook[v1_vector*entry_size+3];
566
567     if(s->pix_fmt == AV_PIX_FMT_RGB24) {
568         data[1][0] =
569             data[1][1] =
570             data[1][    linesize[1]] =
571             data[1][1+  linesize[1]] = info->v1_codebook[v1_vector*entry_size+4];
572
573         data[2][0] =
574             data[2][1] =
575             data[2][    linesize[2]] =
576             data[2][1+  linesize[2]] = info->v1_codebook[v1_vector*entry_size+5];
577     }
578 }
579
580 //decodes the V4 vectors in mb into the 4x4 MB pointed to by data
581 static void decode_v4_vector(CinepakEncContext *s, uint8_t *data[4],
582                              int linesize[4], int *v4_vector, strip_info *info)
583 {
584     int i, x, y, entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
585
586     for(i = y = 0; y < 4; y += 2) {
587         for(x = 0; x < 4; x += 2, i++) {
588             data[0][x   +     y*linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size];
589             data[0][x+1 +     y*linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+1];
590             data[0][x   + (y+1)*linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+2];
591             data[0][x+1 + (y+1)*linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+3];
592
593             if(s->pix_fmt == AV_PIX_FMT_RGB24) {
594                 data[1][(x>>1) + (y>>1)*linesize[1]] = info->v4_codebook[v4_vector[i]*entry_size+4];
595                 data[2][(x>>1) + (y>>1)*linesize[2]] = info->v4_codebook[v4_vector[i]*entry_size+5];
596             }
597         }
598     }
599 }
600
601 static void copy_mb(CinepakEncContext *s,
602                     uint8_t *a_data[4], int a_linesize[4],
603                     uint8_t *b_data[4], int b_linesize[4])
604 {
605     int y, p;
606
607     for(y = 0; y < MB_SIZE; y++) {
608         memcpy(a_data[0]+y*a_linesize[0], b_data[0]+y*b_linesize[0],
609                MB_SIZE);
610     }
611
612     if(s->pix_fmt == AV_PIX_FMT_RGB24) {
613         for(p = 1; p <= 2; p++) {
614             for(y = 0; y < MB_SIZE/2; y++) {
615                 memcpy(a_data[p] + y*a_linesize[p],
616                        b_data[p] + y*b_linesize[p],
617                        MB_SIZE/2);
618             }
619         }
620     }
621 }
622
623 static int encode_mode(CinepakEncContext *s, int h,
624                        uint8_t *scratch_data[4], int scratch_linesize[4],
625                        uint8_t *last_data[4], int last_linesize[4],
626                        strip_info *info, unsigned char *buf)
627 {
628     int x, y, z, flags, bits, temp_size, header_ofs, ret = 0, mb_count = s->w * h / MB_AREA;
629     int needs_extra_bit, should_write_temp;
630     unsigned char temp[64]; //32/2 = 16 V4 blocks at 4 B each -> 64 B
631     mb_info *mb;
632     uint8_t *sub_scratch_data[4] = {0}, *sub_last_data[4] = {0};
633     int sub_scratch_linesize[4] = {0}, sub_last_linesize[4] = {0};
634
635     //encode codebooks
636 ////// MacOS vintage decoder compatibility dictates the presence of
637 ////// the codebook chunk even when the codebook is empty - pretty dumb...
638 ////// and also the certain order of the codebook chunks -- rl
639     if(info->v4_size || !s->skip_empty_cb)
640         ret += encode_codebook(s, info->v4_codebook, info->v4_size, 0x20, 0x24, buf + ret);
641
642     if(info->v1_size || !s->skip_empty_cb)
643         ret += encode_codebook(s, info->v1_codebook, info->v1_size, 0x22, 0x26, buf + ret);
644
645     //update scratch picture
646     for(z = y = 0; y < h; y += MB_SIZE) {
647         for(x = 0; x < s->w; x += MB_SIZE, z++) {
648             mb = &s->mb[z];
649
650             get_sub_picture(s, x, y, scratch_data, scratch_linesize,
651                             sub_scratch_data, sub_scratch_linesize);
652
653             if(info->mode == MODE_MC && mb->best_encoding == ENC_SKIP) {
654                 get_sub_picture(s, x, y,
655                                 last_data, last_linesize,
656                                 sub_last_data, sub_last_linesize);
657                 copy_mb(s, sub_scratch_data, sub_scratch_linesize,
658                         sub_last_data, sub_last_linesize);
659             } else if(info->mode == MODE_V1_ONLY || mb->best_encoding == ENC_V1)
660                 decode_v1_vector(s, sub_scratch_data, sub_scratch_linesize,
661                                  mb->v1_vector, info);
662             else
663                 decode_v4_vector(s, sub_scratch_data, sub_scratch_linesize,
664                                  mb->v4_vector, info);
665         }
666     }
667
668     switch(info->mode) {
669     case MODE_V1_ONLY:
670         //av_log(s->avctx, AV_LOG_INFO, "mb_count = %i\n", mb_count);
671         ret += write_chunk_header(buf + ret, 0x32, mb_count);
672
673         for(x = 0; x < mb_count; x++)
674             buf[ret++] = s->mb[x].v1_vector;
675
676         break;
677     case MODE_V1_V4:
678         //remember header position
679         header_ofs = ret;
680         ret += CHUNK_HEADER_SIZE;
681
682         for(x = 0; x < mb_count; x += 32) {
683             flags = 0;
684             for(y = x; y < FFMIN(x+32, mb_count); y++)
685                 if(s->mb[y].best_encoding == ENC_V4)
686                     flags |= 1 << (31 - y + x);
687
688             AV_WB32(&buf[ret], flags);
689             ret += 4;
690
691             for(y = x; y < FFMIN(x+32, mb_count); y++) {
692                 mb = &s->mb[y];
693
694                 if(mb->best_encoding == ENC_V1)
695                     buf[ret++] = mb->v1_vector;
696                 else
697                     for(z = 0; z < 4; z++)
698                         buf[ret++] = mb->v4_vector[z];
699             }
700         }
701
702         write_chunk_header(buf + header_ofs, 0x30, ret - header_ofs - CHUNK_HEADER_SIZE);
703
704         break;
705     case MODE_MC:
706         //remember header position
707         header_ofs = ret;
708         ret += CHUNK_HEADER_SIZE;
709         flags = bits = temp_size = 0;
710
711         for(x = 0; x < mb_count; x++) {
712             mb = &s->mb[x];
713             flags |= (mb->best_encoding != ENC_SKIP) << (31 - bits++);
714             needs_extra_bit = 0;
715             should_write_temp = 0;
716
717             if(mb->best_encoding != ENC_SKIP) {
718                 if(bits < 32)
719                     flags |= (mb->best_encoding == ENC_V4) << (31 - bits++);
720                 else
721                     needs_extra_bit = 1;
722             }
723
724             if(bits == 32) {
725                 AV_WB32(&buf[ret], flags);
726                 ret += 4;
727                 flags = bits = 0;
728
729                 if(mb->best_encoding == ENC_SKIP || needs_extra_bit) {
730                     memcpy(&buf[ret], temp, temp_size);
731                     ret += temp_size;
732                     temp_size = 0;
733                 } else
734                     should_write_temp = 1;
735             }
736
737             if(needs_extra_bit) {
738                 flags = (mb->best_encoding == ENC_V4) << 31;
739                 bits = 1;
740             }
741
742             if(mb->best_encoding == ENC_V1)
743                 temp[temp_size++] = mb->v1_vector;
744             else if(mb->best_encoding == ENC_V4)
745                 for(z = 0; z < 4; z++)
746                     temp[temp_size++] = mb->v4_vector[z];
747
748             if(should_write_temp) {
749                 memcpy(&buf[ret], temp, temp_size);
750                 ret += temp_size;
751                 temp_size = 0;
752             }
753         }
754
755         if(bits > 0) {
756             AV_WB32(&buf[ret], flags);
757             ret += 4;
758             memcpy(&buf[ret], temp, temp_size);
759             ret += temp_size;
760         }
761
762         write_chunk_header(buf + header_ofs, 0x31, ret - header_ofs - CHUNK_HEADER_SIZE);
763
764         break;
765     }
766
767     return ret;
768 }
769
770 //computes distortion of 4x4 MB in b compared to a
771 static int compute_mb_distortion(CinepakEncContext *s,
772                                  uint8_t *a_data[4], int a_linesize[4],
773                                  uint8_t *b_data[4], int b_linesize[4])
774 {
775     int x, y, p, d, ret = 0;
776
777     for(y = 0; y < MB_SIZE; y++) {
778         for(x = 0; x < MB_SIZE; x++) {
779             d = a_data[0][x + y*a_linesize[0]] - b_data[0][x + y*b_linesize[0]];
780             ret += d*d;
781         }
782     }
783
784     if(s->pix_fmt == AV_PIX_FMT_RGB24) {
785         for(p = 1; p <= 2; p++) {
786             for(y = 0; y < MB_SIZE/2; y++) {
787                 for(x = 0; x < MB_SIZE/2; x++) {
788                     d = a_data[p][x + y*a_linesize[p]] - b_data[p][x + y*b_linesize[p]];
789                     ret += d*d;
790                 }
791             }
792         }
793     }
794
795     return ret;
796 }
797
798 // return the possibly adjusted size of the codebook
799 #define CERTAIN(x) ((x)!=ENC_UNCERTAIN)
800 static int quantize(CinepakEncContext *s, int h,
801                     uint8_t *data[4], int linesize[4],
802                     int v1mode, strip_info *info,
803                     mb_encoding encoding)
804 {
805     int x, y, i, j, k, x2, y2, x3, y3, plane, shift, mbn;
806     int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
807     int *codebook = v1mode ? info->v1_codebook : info->v4_codebook;
808     int size = v1mode ? info->v1_size : info->v4_size;
809     int64_t total_error = 0;
810     uint8_t vq_pict_buf[(MB_AREA*3)/2];
811     uint8_t *sub_data    [4], *vq_data    [4];
812     int      sub_linesize[4],  vq_linesize[4];
813
814     for(mbn = i = y = 0; y < h; y += MB_SIZE) {
815         for(x = 0; x < s->w; x += MB_SIZE, ++mbn) {
816             int *base;
817
818             if(CERTAIN(encoding)) {
819 // use for the training only the blocks known to be to be encoded [sic:-]
820                if(s->mb[mbn].best_encoding != encoding) continue;
821             }
822
823             base = s->codebook_input + i*entry_size;
824             if(v1mode) {
825                 //subsample
826                 for(j = y2 = 0; y2 < entry_size; y2 += 2) {
827                     for(x2 = 0; x2 < 4; x2 += 2, j++) {
828                         plane = y2 < 4 ? 0 : 1 + (x2 >> 1);
829                         shift = y2 < 4 ? 0 : 1;
830                         x3 = shift ? 0 : x2;
831                         y3 = shift ? 0 : y2;
832                         base[j] = (data[plane][((x+x3) >> shift) +      ((y+y3) >> shift)      * linesize[plane]] +
833                                    data[plane][((x+x3) >> shift) + 1 +  ((y+y3) >> shift)      * linesize[plane]] +
834                                    data[plane][((x+x3) >> shift) +     (((y+y3) >> shift) + 1) * linesize[plane]] +
835                                    data[plane][((x+x3) >> shift) + 1 + (((y+y3) >> shift) + 1) * linesize[plane]]) >> 2;
836                     }
837                 }
838             } else {
839                 //copy
840                 for(j = y2 = 0; y2 < MB_SIZE; y2 += 2) {
841                     for(x2 = 0; x2 < MB_SIZE; x2 += 2) {
842                         for(k = 0; k < entry_size; k++, j++) {
843                             plane = k >= 4 ? k - 3 : 0;
844
845                             if(k >= 4) {
846                                 x3 = (x+x2) >> 1;
847                                 y3 = (y+y2) >> 1;
848                             } else {
849                                 x3 = x + x2 + (k & 1);
850                                 y3 = y + y2 + (k >> 1);
851                             }
852
853                             base[j] = data[plane][x3 + y3*linesize[plane]];
854                         }
855                     }
856                 }
857             }
858             i += v1mode ? 1 : 4;
859         }
860     }
861 //    if(i < mbn*(v1mode ? 1 : 4)) {
862 //        av_log(s->avctx, AV_LOG_INFO, "reducing training set for %s from %i to %i (encoding %i)\n", v1mode?"v1":"v4", mbn*(v1mode ? 1 : 4), i, encoding);
863 //    }
864
865     if(i == 0) // empty training set, nothing to do
866         return 0;
867     if(i < size) {
868         //av_log(s->avctx, (CERTAIN(encoding) ? AV_LOG_ERROR : AV_LOG_INFO), "WOULD WASTE: %s cbsize %i bigger than training set size %i (encoding %i)\n", v1mode?"v1":"v4", size, i, encoding);
869         size = i;
870     }
871
872     avpriv_init_elbg(s->codebook_input, entry_size, i, codebook, size, 1, s->codebook_closest, &s->randctx);
873     avpriv_do_elbg(s->codebook_input, entry_size, i, codebook, size, 1, s->codebook_closest, &s->randctx);
874
875     //setup vq_data, which contains a single MB
876     vq_data[0] = vq_pict_buf;
877     vq_linesize[0] = MB_SIZE;
878     vq_data[1] = &vq_pict_buf[MB_AREA];
879     vq_data[2] = vq_data[1] + (MB_AREA >> 2);
880     vq_linesize[1] = vq_linesize[2] = MB_SIZE >> 1;
881
882     //copy indices
883     for(i = j = y = 0; y < h; y += MB_SIZE) {
884         for(x = 0; x < s->w; x += MB_SIZE, j++) {
885             mb_info *mb = &s->mb[j];
886 // skip uninteresting blocks if we know their preferred encoding
887             if(CERTAIN(encoding) && mb->best_encoding != encoding)
888                 continue;
889
890             //point sub_data to current MB
891             get_sub_picture(s, x, y, data, linesize, sub_data, sub_linesize);
892
893             if(v1mode) {
894                 mb->v1_vector = s->codebook_closest[i];
895
896                 //fill in vq_data with V1 data
897                 decode_v1_vector(s, vq_data, vq_linesize, mb->v1_vector, info);
898
899                 mb->v1_error = compute_mb_distortion(s, sub_data, sub_linesize,
900                                                      vq_data, vq_linesize);
901                 total_error += mb->v1_error;
902             } else {
903                 for(k = 0; k < 4; k++)
904                     mb->v4_vector[k] = s->codebook_closest[i+k];
905
906                 //fill in vq_data with V4 data
907                 decode_v4_vector(s, vq_data, vq_linesize, mb->v4_vector, info);
908
909                 mb->v4_error = compute_mb_distortion(s, sub_data, sub_linesize,
910                                                      vq_data, vq_linesize);
911                 total_error += mb->v4_error;
912             }
913             i += v1mode ? 1 : 4;
914         }
915     }
916 // check that we did it right in the beginning of the function
917     av_assert0(i >= size); // training set is no smaller than the codebook
918
919     //av_log(s->avctx, AV_LOG_INFO, "isv1 %i size= %i i= %i error %"PRId64"\n", v1mode, size, i, total_error);
920
921     return size;
922 }
923
924 static void calculate_skip_errors(CinepakEncContext *s, int h,
925                                   uint8_t *last_data[4], int last_linesize[4],
926                                   uint8_t *data[4], int linesize[4],
927                                   strip_info *info)
928 {
929     int x, y, i;
930     uint8_t *sub_last_data    [4], *sub_pict_data    [4];
931     int      sub_last_linesize[4],  sub_pict_linesize[4];
932
933     for(i = y = 0; y < h; y += MB_SIZE) {
934         for(x = 0; x < s->w; x += MB_SIZE, i++) {
935             get_sub_picture(s, x, y, last_data,     last_linesize,
936                                  sub_last_data, sub_last_linesize);
937             get_sub_picture(s, x, y,      data,          linesize,
938                                  sub_pict_data, sub_pict_linesize);
939
940             s->mb[i].skip_error = compute_mb_distortion(s,
941                                             sub_last_data, sub_last_linesize,
942                                             sub_pict_data, sub_pict_linesize);
943         }
944     }
945 }
946
947 static void write_strip_header(CinepakEncContext *s, int y, int h, int keyframe, unsigned char *buf, int strip_size)
948 {
949 // actually we are exclusively using intra strip coding (how much can we win
950 // otherwise? how to choose which part of a codebook to update?),
951 // keyframes are different only because we disallow ENC_SKIP on them -- rl
952 // (besides, the logic here used to be inverted: )
953 //    buf[0] = keyframe ? 0x11: 0x10;
954     buf[0] = keyframe ? 0x10: 0x11;
955     AV_WB24(&buf[1], strip_size + STRIP_HEADER_SIZE);
956 //    AV_WB16(&buf[4], y); /* using absolute y values works -- rl */
957     AV_WB16(&buf[4], 0); /* using relative values works as well -- rl */
958     AV_WB16(&buf[6], 0);
959 //    AV_WB16(&buf[8], y+h); /* using absolute y values works -- rl */
960     AV_WB16(&buf[8], h); /* using relative values works as well -- rl */
961     AV_WB16(&buf[10], s->w);
962     //av_log(s->avctx, AV_LOG_INFO, "write_strip_header() %x keyframe=%d\n", buf[0], keyframe);
963 }
964
965 static int rd_strip(CinepakEncContext *s, int y, int h, int keyframe,
966                     uint8_t *last_data[4], int last_linesize[4],
967                     uint8_t *data[4], int linesize[4],
968                     uint8_t *scratch_data[4], int scratch_linesize[4],
969                     unsigned char *buf, int64_t *best_score
970 #ifdef CINEPAK_REPORT_SERR
971 , int64_t *best_serr
972 #endif
973 )
974 {
975     int64_t score = 0;
976 #ifdef CINEPAK_REPORT_SERR
977     int64_t serr;
978 #endif
979     int best_size = 0;
980     strip_info info;
981 // for codebook optimization:
982     int v1enough, v1_size, v4enough, v4_size;
983     int new_v1_size, new_v4_size;
984     int v1shrunk, v4shrunk;
985
986     if(!keyframe)
987         calculate_skip_errors(s, h, last_data, last_linesize, data, linesize,
988                               &info);
989
990     //try some powers of 4 for the size of the codebooks
991     //constraint the v4 codebook to be no bigger than v1 one,
992     //(and no less than v1_size/4)
993     //thus making v1 preferable and possibly losing small details? should be ok
994 #define SMALLEST_CODEBOOK 1
995     for(v1enough = 0, v1_size = SMALLEST_CODEBOOK; v1_size <= CODEBOOK_MAX && !v1enough; v1_size <<= 2) {
996         for(v4enough = 0, v4_size = 0; v4_size <= v1_size && !v4enough; v4_size = v4_size ? v4_size << 2 : v1_size >= SMALLEST_CODEBOOK << 2 ? v1_size >> 2 : SMALLEST_CODEBOOK) {
997             //try all modes
998             for(CinepakMode mode = 0; mode < MODE_COUNT; mode++) {
999                 //don't allow MODE_MC in intra frames
1000                 if(keyframe && mode == MODE_MC)
1001                     continue;
1002
1003                 if(mode == MODE_V1_ONLY) {
1004                     info.v1_size = v1_size;
1005 // the size may shrink even before optimizations if the input is short:
1006                     info.v1_size = quantize(s, h, data, linesize, 1,
1007                                             &info, ENC_UNCERTAIN);
1008                     if(info.v1_size < v1_size)
1009 // too few eligible blocks, no sense in trying bigger sizes
1010                         v1enough = 1;
1011
1012                     info.v4_size = 0;
1013                 } else { // mode != MODE_V1_ONLY
1014                     // if v4 codebook is empty then only allow V1-only mode
1015                     if(!v4_size)
1016                         continue;
1017
1018                     if(mode == MODE_V1_V4) {
1019                         info.v4_size = v4_size;
1020                         info.v4_size = quantize(s, h, data, linesize, 0,
1021                                                 &info, ENC_UNCERTAIN);
1022                         if(info.v4_size < v4_size)
1023 // too few eligible blocks, no sense in trying bigger sizes
1024                             v4enough = 1;
1025                     }
1026                 }
1027
1028                 info.mode = mode;
1029 // choose the best encoding per block, based on current experience
1030                 score = calculate_mode_score(s, h, &info, 0,
1031                                              &v1shrunk, &v4shrunk
1032 #ifdef CINEPAK_REPORT_SERR
1033 , &serr
1034 #endif
1035 );
1036
1037                 if(mode != MODE_V1_ONLY){
1038                     int extra_iterations_limit = s->max_extra_cb_iterations;
1039 // recompute the codebooks, omitting the extra blocks
1040 // we assume we _may_ come here with more blocks to encode than before
1041                     info.v1_size = v1_size;
1042                     new_v1_size = quantize(s, h, data, linesize, 1, &info, ENC_V1);
1043                     if(new_v1_size < info.v1_size){
1044                         //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v1 codebook to %i entries\n", mode, v1_size, v4_size, new_v1_size);
1045                         info.v1_size = new_v1_size;
1046                     }
1047 // we assume we _may_ come here with more blocks to encode than before
1048                     info.v4_size = v4_size;
1049                     new_v4_size = quantize(s, h, data, linesize, 0, &info, ENC_V4);
1050                     if(new_v4_size < info.v4_size) {
1051                         //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v4 codebook to %i entries at first iteration\n", mode, v1_size, v4_size, new_v4_size);
1052                         info.v4_size = new_v4_size;
1053                     }
1054 // calculate the resulting score
1055 // (do not move blocks to codebook encodings now, as some blocks may have
1056 // got bigger errors despite a smaller training set - but we do not
1057 // ever grow the training sets back)
1058                     for(;;) {
1059                         score = calculate_mode_score(s, h, &info, 1,
1060                                                      &v1shrunk, &v4shrunk
1061 #ifdef CINEPAK_REPORT_SERR
1062 , &serr
1063 #endif
1064 );
1065 // do we have a reason to reiterate? if so, have we reached the limit?
1066                         if((!v1shrunk && !v4shrunk) || !extra_iterations_limit--) break;
1067 // recompute the codebooks, omitting the extra blocks
1068                         if(v1shrunk) {
1069                             info.v1_size = v1_size;
1070                             new_v1_size = quantize(s, h, data, linesize, 1, &info, ENC_V1);
1071                             if(new_v1_size < info.v1_size){
1072                                 //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v1 codebook to %i entries\n", mode, v1_size, v4_size, new_v1_size);
1073                                 info.v1_size = new_v1_size;
1074                             }
1075                         }
1076                         if(v4shrunk) {
1077                             info.v4_size = v4_size;
1078                             new_v4_size = quantize(s, h, data, linesize, 0, &info, ENC_V4);
1079                             if(new_v4_size < info.v4_size) {
1080                                 //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v4 codebook to %i entries\n", mode, v1_size, v4_size, new_v4_size);
1081                                 info.v4_size = new_v4_size;
1082                             }
1083                         }
1084                     }
1085                 }
1086
1087                 //av_log(s->avctx, AV_LOG_INFO, "%3i %3i score = %"PRId64"\n", v1_size, v4_size, score);
1088
1089                 if(best_size == 0 || score < *best_score) {
1090
1091                     *best_score = score;
1092 #ifdef CINEPAK_REPORT_SERR
1093                     *best_serr = serr;
1094 #endif
1095                     best_size = encode_mode(s, h,
1096                                             scratch_data, scratch_linesize,
1097                                             last_data, last_linesize, &info,
1098                                             s->strip_buf + STRIP_HEADER_SIZE);
1099
1100                     //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18"PRId64" %i B", mode, info.v1_size, info.v4_size, score, best_size);
1101                     //av_log(s->avctx, AV_LOG_INFO, "\n");
1102 #ifdef CINEPAK_REPORT_SERR
1103                     av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18"PRId64" %i B\n", mode, v1_size, v4_size, serr, best_size);
1104 #endif
1105
1106 #ifdef CINEPAKENC_DEBUG
1107                     //save MB encoding choices
1108                     memcpy(s->best_mb, s->mb, mb_count*sizeof(mb_info));
1109 #endif
1110
1111                     //memcpy(strip_temp + STRIP_HEADER_SIZE, strip_temp, best_size);
1112                     write_strip_header(s, y, h, keyframe, s->strip_buf, best_size);
1113
1114                 }
1115             }
1116         }
1117     }
1118
1119 #ifdef CINEPAKENC_DEBUG
1120     //gather stats. this will only work properly of MAX_STRIPS == 1
1121     if(best_info.mode == MODE_V1_ONLY) {
1122         s->num_v1_mode++;
1123         s->num_v1_encs += s->w*h/MB_AREA;
1124     } else {
1125         if(best_info.mode == MODE_V1_V4)
1126             s->num_v4_mode++;
1127         else
1128             s->num_mc_mode++;
1129
1130         int x;
1131         for(x = 0; x < s->w*h/MB_AREA; x++)
1132             if(s->best_mb[x].best_encoding == ENC_V1)
1133                 s->num_v1_encs++;
1134             else if(s->best_mb[x].best_encoding == ENC_V4)
1135                 s->num_v4_encs++;
1136             else
1137                 s->num_skips++;
1138     }
1139 #endif
1140
1141     best_size += STRIP_HEADER_SIZE;
1142     memcpy(buf, s->strip_buf, best_size);
1143
1144     return best_size;
1145 }
1146
1147 static int write_cvid_header(CinepakEncContext *s, unsigned char *buf, int num_strips, int data_size, int isakeyframe)
1148 {
1149     buf[0] = isakeyframe ? 0 : 1;
1150     AV_WB24(&buf[1], data_size + CVID_HEADER_SIZE);
1151     AV_WB16(&buf[4], s->w);
1152     AV_WB16(&buf[6], s->h);
1153     AV_WB16(&buf[8], num_strips);
1154
1155     return CVID_HEADER_SIZE;
1156 }
1157
1158 static int rd_frame(CinepakEncContext *s, const AVFrame *frame,
1159                     int isakeyframe, unsigned char *buf, int buf_size)
1160 {
1161     int num_strips, strip, i, y, nexty, size, temp_size;
1162     uint8_t *last_data    [4], *data    [4], *scratch_data    [4];
1163     int      last_linesize[4],  linesize[4],  scratch_linesize[4];
1164     int64_t best_score = 0, score, score_temp;
1165 #ifdef CINEPAK_REPORT_SERR
1166     int64_t best_serr = 0, serr, serr_temp;
1167 #endif
1168
1169     int best_nstrips = -1, best_size = -1; // mark as uninitialzed
1170
1171     if(s->pix_fmt == AV_PIX_FMT_RGB24) {
1172         int x;
1173 // build a copy of the given frame in the correct colorspace
1174         for(y = 0; y < s->h; y += 2) {
1175             for(x = 0; x < s->w; x += 2) {
1176                 uint8_t *ir[2]; int32_t r, g, b, rr, gg, bb;
1177                 ir[0] = frame->data[0] + x*3 + y*frame->linesize[0];
1178                 ir[1] = ir[0] + frame->linesize[0];
1179                 get_sub_picture(s, x, y,
1180                                 s->input_frame->data, s->input_frame->linesize,
1181                                 scratch_data, scratch_linesize);
1182                 r = g = b = 0;
1183                 for(i=0; i<4; ++i) {
1184                     int i1, i2;
1185                     i1 = (i&1); i2 = (i>=2);
1186                     rr = ir[i2][i1*3+0];
1187                     gg = ir[i2][i1*3+1];
1188                     bb = ir[i2][i1*3+2];
1189                     r += rr; g += gg; b += bb;
1190 // using fixed point arithmetic for portable repeatability, scaling by 2^23
1191 // "Y"
1192 //                    rr = 0.2857*rr + 0.5714*gg + 0.1429*bb;
1193                     rr = (2396625*rr + 4793251*gg + 1198732*bb) >> 23;
1194                     if(      rr <   0) rr =   0;
1195                     else if (rr > 255) rr = 255;
1196                     scratch_data[0][i1 + i2*scratch_linesize[0]] = rr;
1197                 }
1198 // let us scale down as late as possible
1199 //                r /= 4; g /= 4; b /= 4;
1200 // "U"
1201 //                rr = -0.1429*r - 0.2857*g + 0.4286*b;
1202                 rr = (-299683*r - 599156*g + 898839*b) >> 23;
1203                 if(      rr < -128) rr = -128;
1204                 else if (rr >  127) rr =  127;
1205                 scratch_data[1][0] = rr + 128; // quantize needs unsigned
1206 // "V"
1207 //                rr = 0.3571*r - 0.2857*g - 0.0714*b;
1208                 rr = (748893*r - 599156*g - 149737*b) >> 23;
1209                 if(      rr < -128) rr = -128;
1210                 else if (rr >  127) rr =  127;
1211                 scratch_data[2][0] = rr + 128; // quantize needs unsigned
1212             }
1213         }
1214     }
1215
1216     //would be nice but quite certainly incompatible with vintage players:
1217     // support encoding zero strips (meaning skip the whole frame)
1218     for(num_strips = s->min_strips; num_strips <= s->max_strips && num_strips <= s->h / MB_SIZE; num_strips++) {
1219         score = 0;
1220         size = 0;
1221 #ifdef CINEPAK_REPORT_SERR
1222         serr = 0;
1223 #endif
1224
1225         for(y = 0, strip = 1; y < s->h; strip++, y = nexty) {
1226             int strip_height;
1227
1228             nexty = strip * s->h / num_strips; // <= s->h
1229             //make nexty the next multiple of 4 if not already there
1230             if(nexty & 3)
1231                 nexty += 4 - (nexty & 3);
1232
1233             strip_height = nexty - y;
1234             if(strip_height <= 0) { // can this ever happen?
1235                 av_log(s->avctx, AV_LOG_INFO, "skipping zero height strip %i of %i\n", strip, num_strips);
1236                 continue;
1237             }
1238
1239             if(s->pix_fmt == AV_PIX_FMT_RGB24)
1240                 get_sub_picture(s, 0, y,
1241                                 s->input_frame->data, s->input_frame->linesize,
1242                                 data, linesize);
1243             else
1244                 get_sub_picture(s, 0, y,
1245                                 (uint8_t **)frame->data, (int*)frame->linesize,
1246                                 data, linesize);
1247             get_sub_picture(s, 0, y,
1248                             s->last_frame->data, s->last_frame->linesize,
1249                             last_data, last_linesize);
1250             get_sub_picture(s, 0, y,
1251                             s->scratch_frame->data, s->scratch_frame->linesize,
1252                             scratch_data, scratch_linesize);
1253
1254             if((temp_size = rd_strip(s, y, strip_height, isakeyframe,
1255                                      last_data, last_linesize, data, linesize,
1256                                      scratch_data, scratch_linesize,
1257                                      s->frame_buf + size + CVID_HEADER_SIZE, &score_temp
1258 #ifdef CINEPAK_REPORT_SERR
1259 , &serr_temp
1260 #endif
1261 )) < 0)
1262                 return temp_size;
1263
1264             score += score_temp;
1265 #ifdef CINEPAK_REPORT_SERR
1266             serr += serr_temp;
1267 #endif
1268             size += temp_size;
1269             //av_log(s->avctx, AV_LOG_INFO, "strip %d, isakeyframe=%d", strip, isakeyframe);
1270             //av_log(s->avctx, AV_LOG_INFO, "\n");
1271         }
1272
1273         if(best_score == 0 || score < best_score) {
1274             best_score = score;
1275 #ifdef CINEPAK_REPORT_SERR
1276             best_serr = serr;
1277 #endif
1278             best_size = size + write_cvid_header(s, s->frame_buf, num_strips, size, isakeyframe);
1279             //av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12"PRId64", %i B\n", num_strips, score, best_size);
1280 #ifdef CINEPAK_REPORT_SERR
1281             av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12"PRId64", %i B\n", num_strips, serr, best_size);
1282 #endif
1283
1284             FFSWAP(AVFrame *, s->best_frame, s->scratch_frame);
1285             memcpy(buf, s->frame_buf, best_size);
1286             best_nstrips = num_strips;
1287         }
1288 // avoid trying too many strip numbers without a real reason
1289 // (this makes the processing of the very first frame faster)
1290         if(num_strips - best_nstrips > 4)
1291             break;
1292     }
1293
1294     av_assert0(best_nstrips >= 0 && best_size >= 0);
1295
1296 // let the number of strips slowly adapt to the changes in the contents,
1297 // compared to full bruteforcing every time this will occasionally lead
1298 // to some r/d performance loss but makes encoding up to several times faster
1299     if(!s->strip_number_delta_range) {
1300         if(best_nstrips == s->max_strips) { // let us try to step up
1301             s->max_strips = best_nstrips + 1;
1302             if(s->max_strips >= s->max_max_strips)
1303                 s->max_strips = s->max_max_strips;
1304         } else { // try to step down
1305             s->max_strips = best_nstrips;
1306         }
1307         s->min_strips = s->max_strips - 1;
1308         if(s->min_strips < s->min_min_strips)
1309             s->min_strips = s->min_min_strips;
1310     } else {
1311         s->max_strips = best_nstrips + s->strip_number_delta_range;
1312         if(s->max_strips >= s->max_max_strips)
1313             s->max_strips = s->max_max_strips;
1314         s->min_strips = best_nstrips - s->strip_number_delta_range;
1315         if(s->min_strips < s->min_min_strips)
1316             s->min_strips = s->min_min_strips;
1317     }
1318
1319     return best_size;
1320 }
1321
1322 static int cinepak_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
1323                                 const AVFrame *frame, int *got_packet)
1324 {
1325     CinepakEncContext *s = avctx->priv_data;
1326     int ret;
1327
1328     s->lambda = frame->quality ? frame->quality - 1 : 2 * FF_LAMBDA_SCALE;
1329
1330     if ((ret = ff_alloc_packet2(avctx, pkt, s->frame_buf_size, 0)) < 0)
1331         return ret;
1332     ret = rd_frame(s, frame, (s->curframe == 0), pkt->data, s->frame_buf_size);
1333     pkt->size = ret;
1334     if (s->curframe == 0)
1335         pkt->flags |= AV_PKT_FLAG_KEY;
1336     *got_packet = 1;
1337
1338     FFSWAP(AVFrame *, s->last_frame, s->best_frame);
1339
1340     if (++s->curframe >= s->keyint)
1341         s->curframe = 0;
1342
1343     return 0;
1344 }
1345
1346 static av_cold int cinepak_encode_end(AVCodecContext *avctx)
1347 {
1348     CinepakEncContext *s = avctx->priv_data;
1349     int x;
1350
1351     av_frame_free(&s->last_frame);
1352     av_frame_free(&s->best_frame);
1353     av_frame_free(&s->scratch_frame);
1354     if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
1355         av_frame_free(&s->input_frame);
1356     av_freep(&s->codebook_input);
1357     av_freep(&s->codebook_closest);
1358     av_freep(&s->strip_buf);
1359     av_freep(&s->frame_buf);
1360     av_freep(&s->mb);
1361 #ifdef CINEPAKENC_DEBUG
1362     av_freep(&s->best_mb);
1363 #endif
1364
1365     for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
1366         av_freep(&s->pict_bufs[x]);
1367
1368 #ifdef CINEPAKENC_DEBUG
1369     av_log(avctx, AV_LOG_INFO, "strip coding stats: %i V1 mode, %i V4 mode, %i MC mode (%i V1 encs, %i V4 encs, %i skips)\n",
1370         s->num_v1_mode, s->num_v4_mode, s->num_mc_mode, s->num_v1_encs, s->num_v4_encs, s->num_skips);
1371 #endif
1372
1373     return 0;
1374 }
1375
1376 AVCodec ff_cinepak_encoder = {
1377     .name           = "cinepak",
1378     .type           = AVMEDIA_TYPE_VIDEO,
1379     .id             = AV_CODEC_ID_CINEPAK,
1380     .priv_data_size = sizeof(CinepakEncContext),
1381     .init           = cinepak_encode_init,
1382     .encode2        = cinepak_encode_frame,
1383     .close          = cinepak_encode_end,
1384     .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_RGB24, AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE},
1385     .long_name      = NULL_IF_CONFIG_SMALL("Cinepak"),
1386     .priv_class     = &cinepak_class,
1387 };
1388