platform/external/ffmpeg.git - Unnamed repository; edit this file 'description' to name the repository.

1 /*
2  * Lagarith lossless decoder
3  * Copyright (c) 2009 Nathan Caldwell <saintdev (at) gmail.com>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file
24  * Lagarith lossless decoder
25  * @author Nathan Caldwell
26  */
27
28 #include <inttypes.h>
29
30 #include "avcodec.h"
31 #include "get_bits.h"
32 #include "mathops.h"
33 #include "lagarithrac.h"
34 #include "lossless_videodsp.h"
35 #include "thread.h"
36
37 enum LagarithFrameType {
38     FRAME_RAW           = 1,    /**< uncompressed */
39     FRAME_U_RGB24       = 2,    /**< unaligned RGB24 */
40     FRAME_ARITH_YUY2    = 3,    /**< arithmetic coded YUY2 */
41     FRAME_ARITH_RGB24   = 4,    /**< arithmetic coded RGB24 */
42     FRAME_SOLID_GRAY    = 5,    /**< solid grayscale color frame */
43     FRAME_SOLID_COLOR   = 6,    /**< solid non-grayscale color frame */
44     FRAME_OLD_ARITH_RGB = 7,    /**< obsolete arithmetic coded RGB (no longer encoded by upstream since version 1.1.0) */
45     FRAME_ARITH_RGBA    = 8,    /**< arithmetic coded RGBA */
46     FRAME_SOLID_RGBA    = 9,    /**< solid RGBA color frame */
47     FRAME_ARITH_YV12    = 10,   /**< arithmetic coded YV12 */
48     FRAME_REDUCED_RES   = 11,   /**< reduced resolution YV12 frame */
49 };
50
51 typedef struct LagarithContext {
52     AVCodecContext *avctx;
53     LLVidDSPContext llviddsp;
54     int zeros;                  /**< number of consecutive zero bytes encountered */
55     int zeros_rem;              /**< number of zero bytes remaining to output */
56     uint8_t *rgb_planes;
57     int      rgb_planes_allocated;
58     int rgb_stride;
59 } LagarithContext;
60
61 /**
62  * Compute the 52-bit mantissa of 1/(double)denom.
63  * This crazy format uses floats in an entropy coder and we have to match x86
64  * rounding exactly, thus ordinary floats aren't portable enough.
65  * @param denom denominator
66  * @return 52-bit mantissa
67  * @see softfloat_mul
68  */
69 static uint64_t softfloat_reciprocal(uint32_t denom)
70 {
71     int shift = av_log2(denom - 1) + 1;
72     uint64_t ret = (1ULL << 52) / denom;
73     uint64_t err = (1ULL << 52) - ret * denom;
74     ret <<= shift;
75     err <<= shift;
76     err +=  denom / 2;
77     return ret + err / denom;
78 }
79
80 /**
81  * (uint32_t)(x*f), where f has the given mantissa, and exponent 0
82  * Used in combination with softfloat_reciprocal computes x/(double)denom.
83  * @param x 32-bit integer factor
84  * @param mantissa mantissa of f with exponent 0
85  * @return 32-bit integer value (x*f)
86  * @see softfloat_reciprocal
87  */
88 static uint32_t softfloat_mul(uint32_t x, uint64_t mantissa)
89 {
90     uint64_t l = x * (mantissa & 0xffffffff);
91     uint64_t h = x * (mantissa >> 32);
92     h += l >> 32;
93     l &= 0xffffffff;
94     l += 1 << av_log2(h >> 21);
95     h += l >> 32;
96     return h >> 20;
97 }
98
99 static uint8_t lag_calc_zero_run(int8_t x)
100 {
101     return (x << 1) ^ (x >> 7);
102 }
103
104 static int lag_decode_prob(GetBitContext *gb, uint32_t *value)
105 {
106     static const uint8_t series[] = { 1, 2, 3, 5, 8, 13, 21 };
107     int i;
108     int bit     = 0;
109     int bits    = 0;
110     int prevbit = 0;
111     unsigned val;
112
113     for (i = 0; i < 7; i++) {
114         if (prevbit && bit)
115             break;
116         prevbit = bit;
117         bit = get_bits1(gb);
118         if (bit && !prevbit)
119             bits += series[i];
120     }
121     bits--;
122     if (bits < 0 || bits > 31) {
123         *value = 0;
124         return -1;
125     } else if (bits == 0) {
126         *value = 0;
127         return 0;
128     }
129
130     val  = get_bits_long(gb, bits);
131     val |= 1U << bits;
132
133     *value = val - 1;
134
135     return 0;
136 }
137
138 static int lag_read_prob_header(lag_rac *rac, GetBitContext *gb)
139 {
140     int i, j, scale_factor;
141     unsigned prob, cumulative_target;
142     unsigned cumul_prob = 0;
143     unsigned scaled_cumul_prob = 0;
144
145     rac->prob[0] = 0;
146     rac->prob[257] = UINT_MAX;
147     /* Read probabilities from bitstream */
148     for (i = 1; i < 257; i++) {
149         if (lag_decode_prob(gb, &rac->prob[i]) < 0) {
150             av_log(rac->avctx, AV_LOG_ERROR, "Invalid probability encountered.\n");
151             return -1;
152         }
153         if ((uint64_t)cumul_prob + rac->prob[i] > UINT_MAX) {
154             av_log(rac->avctx, AV_LOG_ERROR, "Integer overflow encountered in cumulative probability calculation.\n");
155             return -1;
156         }
157         cumul_prob += rac->prob[i];
158         if (!rac->prob[i]) {
159             if (lag_decode_prob(gb, &prob)) {
160                 av_log(rac->avctx, AV_LOG_ERROR, "Invalid probability run encountered.\n");
161                 return -1;
162             }
163             if (prob > 256 - i)
164                 prob = 256 - i;
165             for (j = 0; j < prob; j++)
166                 rac->prob[++i] = 0;
167         }
168     }
169
170     if (!cumul_prob) {
171         av_log(rac->avctx, AV_LOG_ERROR, "All probabilities are 0!\n");
172         return -1;
173     }
174
175     /* Scale probabilities so cumulative probability is an even power of 2. */
176     scale_factor = av_log2(cumul_prob);
177
178     if (cumul_prob & (cumul_prob - 1)) {
179         uint64_t mul = softfloat_reciprocal(cumul_prob);
180         for (i = 1; i <= 128; i++) {
181             rac->prob[i] = softfloat_mul(rac->prob[i], mul);
182             scaled_cumul_prob += rac->prob[i];
183         }
184         if (scaled_cumul_prob <= 0) {
185             av_log(rac->avctx, AV_LOG_ERROR, "Scaled probabilities invalid\n");
186             return AVERROR_INVALIDDATA;
187         }
188         for (; i < 257; i++) {
189             rac->prob[i] = softfloat_mul(rac->prob[i], mul);
190             scaled_cumul_prob += rac->prob[i];
191         }
192
193         scale_factor++;
194         cumulative_target = 1 << scale_factor;
195
196         if (scaled_cumul_prob > cumulative_target) {
197             av_log(rac->avctx, AV_LOG_ERROR,
198                    "Scaled probabilities are larger than target!\n");
199             return -1;
200         }
201
202         scaled_cumul_prob = cumulative_target - scaled_cumul_prob;
203
204         for (i = 1; scaled_cumul_prob; i = (i & 0x7f) + 1) {
205             if (rac->prob[i]) {
206                 rac->prob[i]++;
207                 scaled_cumul_prob--;
208             }
209             /* Comment from reference source:
210              * if (b & 0x80 == 0) {     // order of operations is 'wrong'; it has been left this way
211              *                          // since the compression change is negligible and fixing it
212              *                          // breaks backwards compatibility
213              *      b =- (signed int)b;
214              *      b &= 0xFF;
215              * } else {
216              *      b++;
217              *      b &= 0x7f;
218              * }
219              */
220         }
221     }
222
223     rac->scale = scale_factor;
224
225     /* Fill probability array with cumulative probability for each symbol. */
226     for (i = 1; i < 257; i++)
227         rac->prob[i] += rac->prob[i - 1];
228
229     return 0;
230 }
231
232 static void add_lag_median_prediction(uint8_t *dst, uint8_t *src1,
233                                       uint8_t *diff, int w, int *left,
234                                       int *left_top)
235 {
236     /* This is almost identical to add_hfyu_median_pred in huffyuvdsp.h.
237      * However the &0xFF on the gradient predictor yields incorrect output
238      * for lagarith.
239      */
240     int i;
241     uint8_t l, lt;
242
243     l  = *left;
244     lt = *left_top;
245
246     for (i = 0; i < w; i++) {
247         l = mid_pred(l, src1[i], l + src1[i] - lt) + diff[i];
248         lt = src1[i];
249         dst[i] = l;
250     }
251
252     *left     = l;
253     *left_top = lt;
254 }
255
256 static void lag_pred_line(LagarithContext *l, uint8_t *buf,
257                           int width, int stride, int line)
258 {
259     int L, TL;
260
261     if (!line) {
262         /* Left prediction only for first line */
263         L = l->llviddsp.add_left_pred(buf, buf, width, 0);
264     } else {
265         /* Left pixel is actually prev_row[width] */
266         L = buf[width - stride - 1];
267
268         if (line == 1) {
269             /* Second line, left predict first pixel, the rest of the line is median predicted
270              * NOTE: In the case of RGB this pixel is top predicted */
271             TL = l->avctx->pix_fmt == AV_PIX_FMT_YUV420P ? buf[-stride] : L;
272         } else {
273             /* Top left is 2 rows back, last pixel */
274             TL = buf[width - (2 * stride) - 1];
275         }
276
277         add_lag_median_prediction(buf, buf - stride, buf,
278                                   width, &L, &TL);
279     }
280 }
281
282 static void lag_pred_line_yuy2(LagarithContext *l, uint8_t *buf,
283                                int width, int stride, int line,
284                                int is_luma)
285 {
286     int L, TL;
287
288     if (!line) {
289         L= buf[0];
290         if (is_luma)
291             buf[0] = 0;
292         l->llviddsp.add_left_pred(buf, buf, width, 0);
293         if (is_luma)
294             buf[0] = L;
295         return;
296     }
297     if (line == 1) {
298         const int HEAD = is_luma ? 4 : 2;
299         int i;
300
301         L  = buf[width - stride - 1];
302         TL = buf[HEAD  - stride - 1];
303         for (i = 0; i < HEAD; i++) {
304             L += buf[i];
305             buf[i] = L;
306         }
307         for (; i < width; i++) {
308             L      = mid_pred(L & 0xFF, buf[i - stride], (L + buf[i - stride] - TL) & 0xFF) + buf[i];
309             TL     = buf[i - stride];
310             buf[i] = L;
311         }
312     } else {
313         TL = buf[width - (2 * stride) - 1];
314         L  = buf[width - stride - 1];
315         l->llviddsp.add_median_pred(buf, buf - stride, buf, width, &L, &TL);
316     }
317 }
318
319 static int lag_decode_line(LagarithContext *l, lag_rac *rac,
320                            uint8_t *dst, int width, int stride,
321                            int esc_count)
322 {
323     int i = 0;
324     int ret = 0;
325
326     if (!esc_count)
327         esc_count = -1;
328
329     /* Output any zeros remaining from the previous run */
330 handle_zeros:
331     if (l->zeros_rem) {
332         int count = FFMIN(l->zeros_rem, width - i);
333         memset(dst + i, 0, count);
334         i += count;
335         l->zeros_rem -= count;
336     }
337
338     while (i < width) {
339         dst[i] = lag_get_rac(rac);
340         ret++;
341
342         if (dst[i])
343             l->zeros = 0;
344         else
345             l->zeros++;
346
347         i++;
348         if (l->zeros == esc_count) {
349             int index = lag_get_rac(rac);
350             ret++;
351
352             l->zeros = 0;
353
354             l->zeros_rem = lag_calc_zero_run(index);
355             goto handle_zeros;
356         }
357     }
358     return ret;
359 }
360
361 static int lag_decode_zero_run_line(LagarithContext *l, uint8_t *dst,
362                                     const uint8_t *src, const uint8_t *src_end,
363                                     int width, int esc_count)
364 {
365     int i = 0;
366     int count;
367     uint8_t zero_run = 0;
368     const uint8_t *src_start = src;
369     uint8_t mask1 = -(esc_count < 2);
370     uint8_t mask2 = -(esc_count < 3);
371     uint8_t *end = dst + (width - 2);
372
373     avpriv_request_sample(l->avctx, "zero_run_line");
374
375     memset(dst, 0, width);
376
377 output_zeros:
378     if (l->zeros_rem) {
379         count = FFMIN(l->zeros_rem, width - i);
380         if (end - dst < count) {
381             av_log(l->avctx, AV_LOG_ERROR, "Too many zeros remaining.\n");
382             return AVERROR_INVALIDDATA;
383         }
384
385         memset(dst, 0, count);
386         l->zeros_rem -= count;
387         dst += count;
388     }
389
390     while (dst < end) {
391         i = 0;
392         while (!zero_run && dst + i < end) {
393             i++;
394             if (i+2 >= src_end - src)
395                 return AVERROR_INVALIDDATA;
396             zero_run =
397                 !(src[i] | (src[i + 1] & mask1) | (src[i + 2] & mask2));
398         }
399         if (zero_run) {
400             zero_run = 0;
401             i += esc_count;
402             memcpy(dst, src, i);
403             dst += i;
404             l->zeros_rem = lag_calc_zero_run(src[i]);
405
406             src += i + 1;
407             goto output_zeros;
408         } else {
409             memcpy(dst, src, i);
410             src += i;
411             dst += i;
412         }
413     }
414     return  src - src_start;
415 }
416
417
418
419 static int lag_decode_arith_plane(LagarithContext *l, uint8_t *dst,
420                                   int width, int height, int stride,
421                                   const uint8_t *src, int src_size)
422 {
423     int i = 0;
424     int read = 0;
425     uint32_t length;
426     uint32_t offset = 1;
427     int esc_count;
428     GetBitContext gb;
429     lag_rac rac;
430     const uint8_t *src_end = src + src_size;
431     int ret;
432
433     rac.avctx = l->avctx;
434     l->zeros = 0;
435
436     if(src_size < 2)
437         return AVERROR_INVALIDDATA;
438
439     esc_count = src[0];
440     if (esc_count < 4) {
441         length = width * height;
442         if(src_size < 5)
443             return AVERROR_INVALIDDATA;
444         if (esc_count && AV_RL32(src + 1) < length) {
445             length = AV_RL32(src + 1);
446             offset += 4;
447         }
448
449         if ((ret = init_get_bits8(&gb, src + offset, src_size - offset)) < 0)
450             return ret;
451
452         if (lag_read_prob_header(&rac, &gb) < 0)
453             return -1;
454
455         ff_lag_rac_init(&rac, &gb, length - stride);
456
457         for (i = 0; i < height; i++)
458             read += lag_decode_line(l, &rac, dst + (i * stride), width,
459                                     stride, esc_count);
460
461         if (read > length)
462             av_log(l->avctx, AV_LOG_WARNING,
463                    "Output more bytes than length (%d of %"PRIu32")\n", read,
464                    length);
465     } else if (esc_count < 8) {
466         esc_count -= 4;
467         src ++;
468         src_size --;
469         if (esc_count > 0) {
470             /* Zero run coding only, no range coding. */
471             for (i = 0; i < height; i++) {
472                 int res = lag_decode_zero_run_line(l, dst + (i * stride), src,
473                                                    src_end, width, esc_count);
474                 if (res < 0)
475                     return res;
476                 src += res;
477             }
478         } else {
479             if (src_size < width * height)
480                 return AVERROR_INVALIDDATA; // buffer not big enough
481             /* Plane is stored uncompressed */
482             for (i = 0; i < height; i++) {
483                 memcpy(dst + (i * stride), src, width);
484                 src += width;
485             }
486         }
487     } else if (esc_count == 0xff) {
488         /* Plane is a solid run of given value */
489         for (i = 0; i < height; i++)
490             memset(dst + i * stride, src[1], width);
491         /* Do not apply prediction.
492            Note: memset to 0 above, setting first value to src[1]
493            and applying prediction gives the same result. */
494         return 0;
495     } else {
496         av_log(l->avctx, AV_LOG_ERROR,
497                "Invalid zero run escape code! (%#x)\n", esc_count);
498         return -1;
499     }
500
501     if (l->avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
502         for (i = 0; i < height; i++) {
503             lag_pred_line(l, dst, width, stride, i);
504             dst += stride;
505         }
506     } else {
507         for (i = 0; i < height; i++) {
508             lag_pred_line_yuy2(l, dst, width, stride, i,
509                                width == l->avctx->width);
510             dst += stride;
511         }
512     }
513
514     return 0;
515 }
516
517 /**
518  * Decode a frame.
519  * @param avctx codec context
520  * @param data output AVFrame
521  * @param data_size size of output data or 0 if no picture is returned
522  * @param avpkt input packet
523  * @return number of consumed bytes on success or negative if decode fails
524  */
525 static int lag_decode_frame(AVCodecContext *avctx,
526                             void *data, int *got_frame, AVPacket *avpkt)
527 {
528     const uint8_t *buf = avpkt->data;
529     unsigned int buf_size = avpkt->size;
530     LagarithContext *l = avctx->priv_data;
531     ThreadFrame frame = { .f = data };
532     AVFrame *const p  = data;
533     uint8_t frametype = 0;
534     uint32_t offset_gu = 0, offset_bv = 0, offset_ry = 9;
535     uint32_t offs[4];
536     uint8_t *srcs[4], *dst;
537     int i, j, planes = 3;
538     int ret;
539
540     p->key_frame = 1;
541
542     frametype = buf[0];
543
544     offset_gu = AV_RL32(buf + 1);
545     offset_bv = AV_RL32(buf + 5);
546
547     switch (frametype) {
548     case FRAME_SOLID_RGBA:
549         avctx->pix_fmt = AV_PIX_FMT_RGB32;
550     case FRAME_SOLID_GRAY:
551         if (frametype == FRAME_SOLID_GRAY)
552             if (avctx->bits_per_coded_sample == 24) {
553                 avctx->pix_fmt = AV_PIX_FMT_RGB24;
554             } else {
555                 avctx->pix_fmt = AV_PIX_FMT_0RGB32;
556                 planes = 4;
557             }
558
559         if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
560             return ret;
561
562         dst = p->data[0];
563         if (frametype == FRAME_SOLID_RGBA) {
564         for (j = 0; j < avctx->height; j++) {
565             for (i = 0; i < avctx->width; i++)
566                 AV_WN32(dst + i * 4, offset_gu);
567             dst += p->linesize[0];
568         }
569         } else {
570             for (j = 0; j < avctx->height; j++) {
571                 memset(dst, buf[1], avctx->width * planes);
572                 dst += p->linesize[0];
573             }
574         }
575         break;
576     case FRAME_SOLID_COLOR:
577         if (avctx->bits_per_coded_sample == 24) {
578             avctx->pix_fmt = AV_PIX_FMT_RGB24;
579         } else {
580             avctx->pix_fmt = AV_PIX_FMT_RGB32;
581             offset_gu |= 0xFFU << 24;
582         }
583
584         if ((ret = ff_thread_get_buffer(avctx, &frame,0)) < 0)
585             return ret;
586
587         dst = p->data[0];
588         for (j = 0; j < avctx->height; j++) {
589             for (i = 0; i < avctx->width; i++)
590                 if (avctx->bits_per_coded_sample == 24) {
591                     AV_WB24(dst + i * 3, offset_gu);
592                 } else {
593                     AV_WN32(dst + i * 4, offset_gu);
594                 }
595             dst += p->linesize[0];
596         }
597         break;
598     case FRAME_ARITH_RGBA:
599         avctx->pix_fmt = AV_PIX_FMT_RGB32;
600         planes = 4;
601         offset_ry += 4;
602         offs[3] = AV_RL32(buf + 9);
603     case FRAME_ARITH_RGB24:
604     case FRAME_U_RGB24:
605         if (frametype == FRAME_ARITH_RGB24 || frametype == FRAME_U_RGB24)
606             avctx->pix_fmt = AV_PIX_FMT_RGB24;
607
608         if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
609             return ret;
610
611         offs[0] = offset_bv;
612         offs[1] = offset_gu;
613         offs[2] = offset_ry;
614
615         l->rgb_stride = FFALIGN(avctx->width, 16);
616         av_fast_malloc(&l->rgb_planes, &l->rgb_planes_allocated,
617                        l->rgb_stride * avctx->height * planes + 1);
618         if (!l->rgb_planes) {
619             av_log(avctx, AV_LOG_ERROR, "cannot allocate temporary buffer\n");
620             return AVERROR(ENOMEM);
621         }
622         for (i = 0; i < planes; i++)
623             srcs[i] = l->rgb_planes + (i + 1) * l->rgb_stride * avctx->height - l->rgb_stride;
624         for (i = 0; i < planes; i++)
625             if (buf_size <= offs[i]) {
626                 av_log(avctx, AV_LOG_ERROR,
627                         "Invalid frame offsets\n");
628                 return AVERROR_INVALIDDATA;
629             }
630
631         for (i = 0; i < planes; i++)
632             lag_decode_arith_plane(l, srcs[i],
633                                    avctx->width, avctx->height,
634                                    -l->rgb_stride, buf + offs[i],
635                                    buf_size - offs[i]);
636         dst = p->data[0];
637         for (i = 0; i < planes; i++)
638             srcs[i] = l->rgb_planes + i * l->rgb_stride * avctx->height;
639         for (j = 0; j < avctx->height; j++) {
640             for (i = 0; i < avctx->width; i++) {
641                 uint8_t r, g, b, a;
642                 r = srcs[0][i];
643                 g = srcs[1][i];
644                 b = srcs[2][i];
645                 r += g;
646                 b += g;
647                 if (frametype == FRAME_ARITH_RGBA) {
648                     a = srcs[3][i];
649                     AV_WN32(dst + i * 4, MKBETAG(a, r, g, b));
650                 } else {
651                     dst[i * 3 + 0] = r;
652                     dst[i * 3 + 1] = g;
653                     dst[i * 3 + 2] = b;
654                 }
655             }
656             dst += p->linesize[0];
657             for (i = 0; i < planes; i++)
658                 srcs[i] += l->rgb_stride;
659         }
660         break;
661     case FRAME_ARITH_YUY2:
662         avctx->pix_fmt = AV_PIX_FMT_YUV422P;
663
664         if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
665             return ret;
666
667         if (offset_ry >= buf_size ||
668             offset_gu >= buf_size ||
669             offset_bv >= buf_size) {
670             av_log(avctx, AV_LOG_ERROR,
671                    "Invalid frame offsets\n");
672             return AVERROR_INVALIDDATA;
673         }
674
675         lag_decode_arith_plane(l, p->data[0], avctx->width, avctx->height,
676                                p->linesize[0], buf + offset_ry,
677                                buf_size - offset_ry);
678         lag_decode_arith_plane(l, p->data[1], (avctx->width + 1) / 2,
679                                avctx->height, p->linesize[1],
680                                buf + offset_gu, buf_size - offset_gu);
681         lag_decode_arith_plane(l, p->data[2], (avctx->width + 1) / 2,
682                                avctx->height, p->linesize[2],
683                                buf + offset_bv, buf_size - offset_bv);
684         break;
685     case FRAME_ARITH_YV12:
686         avctx->pix_fmt = AV_PIX_FMT_YUV420P;
687
688         if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
689             return ret;
690         if (buf_size <= offset_ry || buf_size <= offset_gu || buf_size <= offset_bv) {
691             return AVERROR_INVALIDDATA;
692         }
693
694         if (offset_ry >= buf_size ||
695             offset_gu >= buf_size ||
696             offset_bv >= buf_size) {
697             av_log(avctx, AV_LOG_ERROR,
698                    "Invalid frame offsets\n");
699             return AVERROR_INVALIDDATA;
700         }
701
702         lag_decode_arith_plane(l, p->data[0], avctx->width, avctx->height,
703                                p->linesize[0], buf + offset_ry,
704                                buf_size - offset_ry);
705         lag_decode_arith_plane(l, p->data[2], (avctx->width + 1) / 2,
706                                (avctx->height + 1) / 2, p->linesize[2],
707                                buf + offset_gu, buf_size - offset_gu);
708         lag_decode_arith_plane(l, p->data[1], (avctx->width + 1) / 2,
709                                (avctx->height + 1) / 2, p->linesize[1],
710                                buf + offset_bv, buf_size - offset_bv);
711         break;
712     default:
713         av_log(avctx, AV_LOG_ERROR,
714                "Unsupported Lagarith frame type: %#"PRIx8"\n", frametype);
715         return AVERROR_PATCHWELCOME;
716     }
717
718     *got_frame = 1;
719
720     return buf_size;
721 }
722
723 static av_cold int lag_decode_init(AVCodecContext *avctx)
724 {
725     LagarithContext *l = avctx->priv_data;
726     l->avctx = avctx;
727
728     ff_llviddsp_init(&l->llviddsp);
729
730     return 0;
731 }
732
733 #if HAVE_THREADS
734 static av_cold int lag_decode_init_thread_copy(AVCodecContext *avctx)
735 {
736     LagarithContext *l = avctx->priv_data;
737     l->avctx = avctx;
738
739     return 0;
740 }
741 #endif
742
743 static av_cold int lag_decode_end(AVCodecContext *avctx)
744 {
745     LagarithContext *l = avctx->priv_data;
746
747     av_freep(&l->rgb_planes);
748
749     return 0;
750 }
751
752 AVCodec ff_lagarith_decoder = {
753     .name           = "lagarith",
754     .long_name      = NULL_IF_CONFIG_SMALL("Lagarith lossless"),
755     .type           = AVMEDIA_TYPE_VIDEO,
756     .id             = AV_CODEC_ID_LAGARITH,
757     .priv_data_size = sizeof(LagarithContext),
758     .init           = lag_decode_init,
759     .init_thread_copy = ONLY_IF_THREADS_ENABLED(lag_decode_init_thread_copy),
760     .close          = lag_decode_end,
761     .decode         = lag_decode_frame,
762     .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
763 };
764
1	/*
2	* Lagarith lossless decoder
3	* Copyright (c) 2009 Nathan Caldwell <saintdev (at) gmail.com>
4	*
5	* This file is part of FFmpeg.
6	*
7	* FFmpeg is free software; you can redistribute it and/or
8	* modify it under the terms of the GNU Lesser General Public
9	* License as published by the Free Software Foundation; either
10	* version 2.1 of the License, or (at your option) any later version.
11	*
12	* FFmpeg is distributed in the hope that it will be useful,
13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15	* Lesser General Public License for more details.
16	*
17	* You should have received a copy of the GNU Lesser General Public
18	* License along with FFmpeg; if not, write to the Free Software
19	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20	*/
21
22	/**
23	* @file
24	* Lagarith lossless decoder
25	* @author Nathan Caldwell
26	*/
27
28	#include <inttypes.h>
29
30	#include "avcodec.h"
31	#include "get_bits.h"
32	#include "mathops.h"
33	#include "lagarithrac.h"
34	#include "lossless_videodsp.h"
35	#include "thread.h"
36
37	enum LagarithFrameType {
38	FRAME_RAW = 1, /*< uncompressed /
39	FRAME_U_RGB24 = 2, /*< unaligned RGB24 /
40	FRAME_ARITH_YUY2 = 3, /*< arithmetic coded YUY2 /
41	FRAME_ARITH_RGB24 = 4, /*< arithmetic coded RGB24 /
42	FRAME_SOLID_GRAY = 5, /*< solid grayscale color frame /
43	FRAME_SOLID_COLOR = 6, /*< solid non-grayscale color frame /
44	FRAME_OLD_ARITH_RGB = 7, /*< obsolete arithmetic coded RGB (no longer encoded by upstream since version 1.1.0) /
45	FRAME_ARITH_RGBA = 8, /*< arithmetic coded RGBA /
46	FRAME_SOLID_RGBA = 9, /*< solid RGBA color frame /
47	FRAME_ARITH_YV12 = 10, /*< arithmetic coded YV12 /
48	FRAME_REDUCED_RES = 11, /*< reduced resolution YV12 frame /
49	};
50
51	typedef struct LagarithContext {
52	AVCodecContext *avctx;
53	LLVidDSPContext llviddsp;
54	int zeros; /*< number of consecutive zero bytes encountered /
55	int zeros_rem; /*< number of zero bytes remaining to output /
56	uint8_t *rgb_planes;
57	int rgb_planes_allocated;
58	int rgb_stride;
59	} LagarithContext;
60
61	/**
62	* Compute the 52-bit mantissa of 1/(double)denom.
63	* This crazy format uses floats in an entropy coder and we have to match x86
64	* rounding exactly, thus ordinary floats aren't portable enough.
65	* @param denom denominator
66	* @return 52-bit mantissa
67	* @see softfloat_mul
68	*/
69	static uint64_t softfloat_reciprocal(uint32_t denom)
70	{
71	int shift = av_log2(denom - 1) + 1;
72	uint64_t ret = (1ULL << 52) / denom;
73	uint64_t err = (1ULL << 52) - ret * denom;
74	ret <<= shift;
75	err <<= shift;
76	err += denom / 2;
77	return ret + err / denom;
78	}
79
80	/**
81	* (uint32_t)(x*f), where f has the given mantissa, and exponent 0
82	* Used in combination with softfloat_reciprocal computes x/(double)denom.
83	* @param x 32-bit integer factor
84	* @param mantissa mantissa of f with exponent 0
85	* @return 32-bit integer value (x*f)
86	* @see softfloat_reciprocal
87	*/
88	static uint32_t softfloat_mul(uint32_t x, uint64_t mantissa)
89	{
90	uint64_t l = x * (mantissa & 0xffffffff);
91	uint64_t h = x * (mantissa >> 32);
92	h += l >> 32;
93	l &= 0xffffffff;
94	l += 1 << av_log2(h >> 21);
95	h += l >> 32;
96	return h >> 20;
97	}
98
99	static uint8_t lag_calc_zero_run(int8_t x)
100	{
101	return (x << 1) ^ (x >> 7);
102	}
103
104	static int lag_decode_prob(GetBitContext gb, uint32_t value)
105	{
106	static const uint8_t series[] = { 1, 2, 3, 5, 8, 13, 21 };
107	int i;
108	int bit = 0;
109	int bits = 0;
110	int prevbit = 0;
111	unsigned val;
112
113	for (i = 0; i < 7; i++) {
114	if (prevbit && bit)
115	break;
116	prevbit = bit;
117	bit = get_bits1(gb);
118	if (bit && !prevbit)
119	bits += series[i];
120	}
121	bits--;
122	if (bits < 0 \|\| bits > 31) {
123	*value = 0;
124	return -1;
125	} else if (bits == 0) {
126	*value = 0;
127	return 0;
128	}
129
130	val = get_bits_long(gb, bits);
131	val \|= 1U << bits;
132
133	*value = val - 1;
134
135	return 0;
136	}
137
138	static int lag_read_prob_header(lag_rac rac, GetBitContext gb)
139	{
140	int i, j, scale_factor;
141	unsigned prob, cumulative_target;
142	unsigned cumul_prob = 0;
143	unsigned scaled_cumul_prob = 0;
144
145	rac->prob[0] = 0;
146	rac->prob[257] = UINT_MAX;
147	/* Read probabilities from bitstream */
148	for (i = 1; i < 257; i++) {
149	if (lag_decode_prob(gb, &rac->prob[i]) < 0) {
150	av_log(rac->avctx, AV_LOG_ERROR, "Invalid probability encountered.\n");
151	return -1;
152	}
153	if ((uint64_t)cumul_prob + rac->prob[i] > UINT_MAX) {
154	av_log(rac->avctx, AV_LOG_ERROR, "Integer overflow encountered in cumulative probability calculation.\n");
155	return -1;
156	}
157	cumul_prob += rac->prob[i];
158	if (!rac->prob[i]) {
159	if (lag_decode_prob(gb, &prob)) {
160	av_log(rac->avctx, AV_LOG_ERROR, "Invalid probability run encountered.\n");
161	return -1;
162	}
163	if (prob > 256 - i)
164	prob = 256 - i;
165	for (j = 0; j < prob; j++)
166	rac->prob[++i] = 0;
167	}
168	}
169
170	if (!cumul_prob) {
171	av_log(rac->avctx, AV_LOG_ERROR, "All probabilities are 0!\n");
172	return -1;
173	}
174
175	/* Scale probabilities so cumulative probability is an even power of 2. */
176	scale_factor = av_log2(cumul_prob);
177
178	if (cumul_prob & (cumul_prob - 1)) {
179	uint64_t mul = softfloat_reciprocal(cumul_prob);
180	for (i = 1; i <= 128; i++) {
181	rac->prob[i] = softfloat_mul(rac->prob[i], mul);
182	scaled_cumul_prob += rac->prob[i];
183	}
184	if (scaled_cumul_prob <= 0) {
185	av_log(rac->avctx, AV_LOG_ERROR, "Scaled probabilities invalid\n");
186	return AVERROR_INVALIDDATA;
187	}
188	for (; i < 257; i++) {
189	rac->prob[i] = softfloat_mul(rac->prob[i], mul);
190	scaled_cumul_prob += rac->prob[i];
191	}
192
193	scale_factor++;
194	cumulative_target = 1 << scale_factor;
195
196	if (scaled_cumul_prob > cumulative_target) {
197	av_log(rac->avctx, AV_LOG_ERROR,
198	"Scaled probabilities are larger than target!\n");
199	return -1;
200	}
201
202	scaled_cumul_prob = cumulative_target - scaled_cumul_prob;
203
204	for (i = 1; scaled_cumul_prob; i = (i & 0x7f) + 1) {
205	if (rac->prob[i]) {
206	rac->prob[i]++;
207	scaled_cumul_prob--;
208	}
209	/* Comment from reference source:
210	* if (b & 0x80 == 0) { // order of operations is 'wrong'; it has been left this way
211	* // since the compression change is negligible and fixing it
212	* // breaks backwards compatibility
213	* b =- (signed int)b;
214	* b &= 0xFF;
215	* } else {
216	* b++;
217	* b &= 0x7f;
218	* }
219	*/
220	}
221	}
222
223	rac->scale = scale_factor;
224
225	/* Fill probability array with cumulative probability for each symbol. */
226	for (i = 1; i < 257; i++)
227	rac->prob[i] += rac->prob[i - 1];
228
229	return 0;
230	}
231
232	static void add_lag_median_prediction(uint8_t dst, uint8_t src1,
233	uint8_t diff, int w, int left,
234	int *left_top)
235	{
236	/* This is almost identical to add_hfyu_median_pred in huffyuvdsp.h.
237	* However the &0xFF on the gradient predictor yields incorrect output
238	* for lagarith.
239	*/
240	int i;
241	uint8_t l, lt;
242
243	l = *left;
244	lt = *left_top;
245
246	for (i = 0; i < w; i++) {
247	l = mid_pred(l, src1[i], l + src1[i] - lt) + diff[i];
248	lt = src1[i];
249	dst[i] = l;
250	}
251
252	*left = l;
253	*left_top = lt;
254	}
255
256	static void lag_pred_line(LagarithContext l, uint8_t buf,
257	int width, int stride, int line)
258	{
259	int L, TL;
260
261	if (!line) {
262	/* Left prediction only for first line */
263	L = l->llviddsp.add_left_pred(buf, buf, width, 0);
264	} else {
265	/* Left pixel is actually prev_row[width] */
266	L = buf[width - stride - 1];
267
268	if (line == 1) {
269	/* Second line, left predict first pixel, the rest of the line is median predicted
270	* NOTE: In the case of RGB this pixel is top predicted */
271	TL = l->avctx->pix_fmt == AV_PIX_FMT_YUV420P ? buf[-stride] : L;
272	} else {
273	/* Top left is 2 rows back, last pixel */
274	TL = buf[width - (2 * stride) - 1];
275	}
276
277	add_lag_median_prediction(buf, buf - stride, buf,
278	width, &L, &TL);
279	}
280	}
281
282	static void lag_pred_line_yuy2(LagarithContext l, uint8_t buf,
283	int width, int stride, int line,
284	int is_luma)
285	{
286	int L, TL;
287
288	if (!line) {
289	L= buf[0];
290	if (is_luma)
291	buf[0] = 0;
292	l->llviddsp.add_left_pred(buf, buf, width, 0);
293	if (is_luma)
294	buf[0] = L;
295	return;
296	}
297	if (line == 1) {
298	const int HEAD = is_luma ? 4 : 2;
299	int i;
300
301	L = buf[width - stride - 1];
302	TL = buf[HEAD - stride - 1];
303	for (i = 0; i < HEAD; i++) {
304	L += buf[i];
305	buf[i] = L;
306	}
307	for (; i < width; i++) {
308	L = mid_pred(L & 0xFF, buf[i - stride], (L + buf[i - stride] - TL) & 0xFF) + buf[i];
309	TL = buf[i - stride];
310	buf[i] = L;
311	}
312	} else {
313	TL = buf[width - (2 * stride) - 1];
314	L = buf[width - stride - 1];
315	l->llviddsp.add_median_pred(buf, buf - stride, buf, width, &L, &TL);
316	}
317	}
318
319	static int lag_decode_line(LagarithContext l, lag_rac rac,
320	uint8_t *dst, int width, int stride,
321	int esc_count)
322	{
323	int i = 0;
324	int ret = 0;
325
326	if (!esc_count)
327	esc_count = -1;
328
329	/* Output any zeros remaining from the previous run */
330	handle_zeros:
331	if (l->zeros_rem) {
332	int count = FFMIN(l->zeros_rem, width - i);
333	memset(dst + i, 0, count);
334	i += count;
335	l->zeros_rem -= count;
336	}
337
338	while (i < width) {
339	dst[i] = lag_get_rac(rac);
340	ret++;
341
342	if (dst[i])
343	l->zeros = 0;
344	else
345	l->zeros++;
346
347	i++;
348	if (l->zeros == esc_count) {
349	int index = lag_get_rac(rac);
350	ret++;
351
352	l->zeros = 0;
353
354	l->zeros_rem = lag_calc_zero_run(index);
355	goto handle_zeros;
356	}
357	}
358	return ret;
359	}
360
361	static int lag_decode_zero_run_line(LagarithContext l, uint8_t dst,
362	const uint8_t src, const uint8_t src_end,
363	int width, int esc_count)
364	{
365	int i = 0;
366	int count;
367	uint8_t zero_run = 0;
368	const uint8_t *src_start = src;
369	uint8_t mask1 = -(esc_count < 2);
370	uint8_t mask2 = -(esc_count < 3);
371	uint8_t *end = dst + (width - 2);
372
373	avpriv_request_sample(l->avctx, "zero_run_line");
374
375	memset(dst, 0, width);
376
377	output_zeros:
378	if (l->zeros_rem) {
379	count = FFMIN(l->zeros_rem, width - i);
380	if (end - dst < count) {
381	av_log(l->avctx, AV_LOG_ERROR, "Too many zeros remaining.\n");
382	return AVERROR_INVALIDDATA;
383	}
384
385	memset(dst, 0, count);
386	l->zeros_rem -= count;
387	dst += count;
388	}
389
390	while (dst < end) {
391	i = 0;
392	while (!zero_run && dst + i < end) {
393	i++;
394	if (i+2 >= src_end - src)
395	return AVERROR_INVALIDDATA;
396	zero_run =
397	!(src[i] \| (src[i + 1] & mask1) \| (src[i + 2] & mask2));
398	}
399	if (zero_run) {
400	zero_run = 0;
401	i += esc_count;
402	memcpy(dst, src, i);
403	dst += i;
404	l->zeros_rem = lag_calc_zero_run(src[i]);
405
406	src += i + 1;
407	goto output_zeros;
408	} else {
409	memcpy(dst, src, i);
410	src += i;
411	dst += i;
412	}
413	}
414	return src - src_start;
415	}
416
417
418
419	static int lag_decode_arith_plane(LagarithContext l, uint8_t dst,
420	int width, int height, int stride,
421	const uint8_t *src, int src_size)
422	{
423	int i = 0;
424	int read = 0;
425	uint32_t length;
426	uint32_t offset = 1;
427	int esc_count;
428	GetBitContext gb;
429	lag_rac rac;
430	const uint8_t *src_end = src + src_size;
431	int ret;
432
433	rac.avctx = l->avctx;
434	l->zeros = 0;
435
436	if(src_size < 2)
437	return AVERROR_INVALIDDATA;
438
439	esc_count = src[0];
440	if (esc_count < 4) {
441	length = width * height;
442	if(src_size < 5)
443	return AVERROR_INVALIDDATA;
444	if (esc_count && AV_RL32(src + 1) < length) {
445	length = AV_RL32(src + 1);
446	offset += 4;
447	}
448
449	if ((ret = init_get_bits8(&gb, src + offset, src_size - offset)) < 0)
450	return ret;
451
452	if (lag_read_prob_header(&rac, &gb) < 0)
453	return -1;
454
455	ff_lag_rac_init(&rac, &gb, length - stride);
456
457	for (i = 0; i < height; i++)
458	read += lag_decode_line(l, &rac, dst + (i * stride), width,
459	stride, esc_count);
460
461	if (read > length)
462	av_log(l->avctx, AV_LOG_WARNING,
463	"Output more bytes than length (%d of %"PRIu32")\n", read,
464	length);
465	} else if (esc_count < 8) {
466	esc_count -= 4;
467	src ++;
468	src_size --;
469	if (esc_count > 0) {
470	/* Zero run coding only, no range coding. */
471	for (i = 0; i < height; i++) {
472	int res = lag_decode_zero_run_line(l, dst + (i * stride), src,
473	src_end, width, esc_count);
474	if (res < 0)
475	return res;
476	src += res;
477	}
478	} else {
479	if (src_size < width * height)
480	return AVERROR_INVALIDDATA; // buffer not big enough
481	/* Plane is stored uncompressed */
482	for (i = 0; i < height; i++) {
483	memcpy(dst + (i * stride), src, width);
484	src += width;
485	}
486	}
487	} else if (esc_count == 0xff) {
488	/* Plane is a solid run of given value */
489	for (i = 0; i < height; i++)
490	memset(dst + i * stride, src[1], width);
491	/* Do not apply prediction.
492	Note: memset to 0 above, setting first value to src[1]
493	and applying prediction gives the same result. */
494	return 0;
495	} else {
496	av_log(l->avctx, AV_LOG_ERROR,
497	"Invalid zero run escape code! (%#x)\n", esc_count);
498	return -1;
499	}
500
501	if (l->avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
502	for (i = 0; i < height; i++) {
503	lag_pred_line(l, dst, width, stride, i);
504	dst += stride;
505	}
506	} else {
507	for (i = 0; i < height; i++) {
508	lag_pred_line_yuy2(l, dst, width, stride, i,
509	width == l->avctx->width);
510	dst += stride;
511	}
512	}
513
514	return 0;
515	}
516
517	/**
518	* Decode a frame.
519	* @param avctx codec context
520	* @param data output AVFrame
521	* @param data_size size of output data or 0 if no picture is returned
522	* @param avpkt input packet
523	* @return number of consumed bytes on success or negative if decode fails
524	*/
525	static int lag_decode_frame(AVCodecContext *avctx,
526	void data, int got_frame, AVPacket *avpkt)
527	{
528	const uint8_t *buf = avpkt->data;
529	unsigned int buf_size = avpkt->size;
530	LagarithContext *l = avctx->priv_data;
531	ThreadFrame frame = { .f = data };
532	AVFrame *const p = data;
533	uint8_t frametype = 0;
534	uint32_t offset_gu = 0, offset_bv = 0, offset_ry = 9;
535	uint32_t offs[4];
536	uint8_t srcs[4], dst;
537	int i, j, planes = 3;
538	int ret;
539
540	p->key_frame = 1;
541
542	frametype = buf[0];
543
544	offset_gu = AV_RL32(buf + 1);
545	offset_bv = AV_RL32(buf + 5);
546
547	switch (frametype) {
548	case FRAME_SOLID_RGBA:
549	avctx->pix_fmt = AV_PIX_FMT_RGB32;
550	case FRAME_SOLID_GRAY:
551	if (frametype == FRAME_SOLID_GRAY)
552	if (avctx->bits_per_coded_sample == 24) {
553	avctx->pix_fmt = AV_PIX_FMT_RGB24;
554	} else {
555	avctx->pix_fmt = AV_PIX_FMT_0RGB32;
556	planes = 4;
557	}
558
559	if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
560	return ret;
561
562	dst = p->data[0];
563	if (frametype == FRAME_SOLID_RGBA) {
564	for (j = 0; j < avctx->height; j++) {
565	for (i = 0; i < avctx->width; i++)
566	AV_WN32(dst + i * 4, offset_gu);
567	dst += p->linesize[0];
568	}
569	} else {
570	for (j = 0; j < avctx->height; j++) {
571	memset(dst, buf[1], avctx->width * planes);
572	dst += p->linesize[0];
573	}
574	}
575	break;
576	case FRAME_SOLID_COLOR:
577	if (avctx->bits_per_coded_sample == 24) {
578	avctx->pix_fmt = AV_PIX_FMT_RGB24;
579	} else {
580	avctx->pix_fmt = AV_PIX_FMT_RGB32;
581	offset_gu \|= 0xFFU << 24;
582	}
583
584	if ((ret = ff_thread_get_buffer(avctx, &frame,0)) < 0)
585	return ret;
586
587	dst = p->data[0];
588	for (j = 0; j < avctx->height; j++) {
589	for (i = 0; i < avctx->width; i++)
590	if (avctx->bits_per_coded_sample == 24) {
591	AV_WB24(dst + i * 3, offset_gu);
592	} else {
593	AV_WN32(dst + i * 4, offset_gu);
594	}
595	dst += p->linesize[0];
596	}
597	break;
598	case FRAME_ARITH_RGBA:
599	avctx->pix_fmt = AV_PIX_FMT_RGB32;
600	planes = 4;
601	offset_ry += 4;
602	offs[3] = AV_RL32(buf + 9);
603	case FRAME_ARITH_RGB24:
604	case FRAME_U_RGB24:
605	if (frametype == FRAME_ARITH_RGB24 \|\| frametype == FRAME_U_RGB24)
606	avctx->pix_fmt = AV_PIX_FMT_RGB24;
607
608	if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
609	return ret;
610
611	offs[0] = offset_bv;
612	offs[1] = offset_gu;
613	offs[2] = offset_ry;
614
615	l->rgb_stride = FFALIGN(avctx->width, 16);
616	av_fast_malloc(&l->rgb_planes, &l->rgb_planes_allocated,
617	l->rgb_stride * avctx->height * planes + 1);
618	if (!l->rgb_planes) {
619	av_log(avctx, AV_LOG_ERROR, "cannot allocate temporary buffer\n");
620	return AVERROR(ENOMEM);
621	}
622	for (i = 0; i < planes; i++)
623	srcs[i] = l->rgb_planes + (i + 1) * l->rgb_stride * avctx->height - l->rgb_stride;
624	for (i = 0; i < planes; i++)
625	if (buf_size <= offs[i]) {
626	av_log(avctx, AV_LOG_ERROR,
627	"Invalid frame offsets\n");
628	return AVERROR_INVALIDDATA;
629	}
630
631	for (i = 0; i < planes; i++)
632	lag_decode_arith_plane(l, srcs[i],
633	avctx->width, avctx->height,
634	-l->rgb_stride, buf + offs[i],
635	buf_size - offs[i]);
636	dst = p->data[0];
637	for (i = 0; i < planes; i++)
638	srcs[i] = l->rgb_planes + i * l->rgb_stride * avctx->height;
639	for (j = 0; j < avctx->height; j++) {
640	for (i = 0; i < avctx->width; i++) {
641	uint8_t r, g, b, a;
642	r = srcs[0][i];
643	g = srcs[1][i];
644	b = srcs[2][i];
645	r += g;
646	b += g;
647	if (frametype == FRAME_ARITH_RGBA) {
648	a = srcs[3][i];
649	AV_WN32(dst + i * 4, MKBETAG(a, r, g, b));
650	} else {
651	dst[i * 3 + 0] = r;
652	dst[i * 3 + 1] = g;
653	dst[i * 3 + 2] = b;
654	}
655	}
656	dst += p->linesize[0];
657	for (i = 0; i < planes; i++)
658	srcs[i] += l->rgb_stride;
659	}
660	break;
661	case FRAME_ARITH_YUY2:
662	avctx->pix_fmt = AV_PIX_FMT_YUV422P;
663
664	if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
665	return ret;
666
667	if (offset_ry >= buf_size \|\|
668	offset_gu >= buf_size \|\|
669	offset_bv >= buf_size) {
670	av_log(avctx, AV_LOG_ERROR,
671	"Invalid frame offsets\n");
672	return AVERROR_INVALIDDATA;
673	}
674
675	lag_decode_arith_plane(l, p->data[0], avctx->width, avctx->height,
676	p->linesize[0], buf + offset_ry,
677	buf_size - offset_ry);
678	lag_decode_arith_plane(l, p->data[1], (avctx->width + 1) / 2,
679	avctx->height, p->linesize[1],
680	buf + offset_gu, buf_size - offset_gu);
681	lag_decode_arith_plane(l, p->data[2], (avctx->width + 1) / 2,
682	avctx->height, p->linesize[2],
683	buf + offset_bv, buf_size - offset_bv);
684	break;
685	case FRAME_ARITH_YV12:
686	avctx->pix_fmt = AV_PIX_FMT_YUV420P;
687
688	if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
689	return ret;
690	if (buf_size <= offset_ry \|\| buf_size <= offset_gu \|\| buf_size <= offset_bv) {
691	return AVERROR_INVALIDDATA;
692	}
693
694	if (offset_ry >= buf_size \|\|
695	offset_gu >= buf_size \|\|
696	offset_bv >= buf_size) {
697	av_log(avctx, AV_LOG_ERROR,
698	"Invalid frame offsets\n");
699	return AVERROR_INVALIDDATA;
700	}
701
702	lag_decode_arith_plane(l, p->data[0], avctx->width, avctx->height,
703	p->linesize[0], buf + offset_ry,
704	buf_size - offset_ry);
705	lag_decode_arith_plane(l, p->data[2], (avctx->width + 1) / 2,
706	(avctx->height + 1) / 2, p->linesize[2],
707	buf + offset_gu, buf_size - offset_gu);
708	lag_decode_arith_plane(l, p->data[1], (avctx->width + 1) / 2,
709	(avctx->height + 1) / 2, p->linesize[1],
710	buf + offset_bv, buf_size - offset_bv);
711	break;
712	default:
713	av_log(avctx, AV_LOG_ERROR,
714	"Unsupported Lagarith frame type: %#"PRIx8"\n", frametype);
715	return AVERROR_PATCHWELCOME;
716	}
717
718	*got_frame = 1;
719
720	return buf_size;
721	}
722
723	static av_cold int lag_decode_init(AVCodecContext *avctx)
724	{
725	LagarithContext *l = avctx->priv_data;
726	l->avctx = avctx;
727
728	ff_llviddsp_init(&l->llviddsp);
729
730	return 0;
731	}
732
733	#if HAVE_THREADS
734	static av_cold int lag_decode_init_thread_copy(AVCodecContext *avctx)
735	{
736	LagarithContext *l = avctx->priv_data;
737	l->avctx = avctx;
738
739	return 0;
740	}
741	#endif
742
743	static av_cold int lag_decode_end(AVCodecContext *avctx)
744	{
745	LagarithContext *l = avctx->priv_data;
746
747	av_freep(&l->rgb_planes);
748
749	return 0;
750	}
751
752	AVCodec ff_lagarith_decoder = {
753	.name = "lagarith",
754	.long_name = NULL_IF_CONFIG_SMALL("Lagarith lossless"),
755	.type = AVMEDIA_TYPE_VIDEO,
756	.id = AV_CODEC_ID_LAGARITH,
757	.priv_data_size = sizeof(LagarithContext),
758	.init = lag_decode_init,
759	.init_thread_copy = ONLY_IF_THREADS_ENABLED(lag_decode_init_thread_copy),
760	.close = lag_decode_end,
761	.decode = lag_decode_frame,
762	.capabilities = AV_CODEC_CAP_DR1 \| AV_CODEC_CAP_FRAME_THREADS,
763	};
764