blob: 53ac0d7224521315ea6137c2575f62c23e1a8980
1 | /* |
2 | * Copyright (c) 2016 Ronald S. Bultje <rsbultje@gmail.com> |
3 | * |
4 | * This file is part of FFmpeg. |
5 | * |
6 | * FFmpeg is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU Lesser General Public |
8 | * License as published by the Free Software Foundation; either |
9 | * version 2.1 of the License, or (at your option) any later version. |
10 | * |
11 | * FFmpeg is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * Lesser General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU Lesser General Public |
17 | * License along with FFmpeg; if not, write to the Free Software |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | */ |
20 | |
21 | #include "libavutil/avassert.h" |
22 | |
23 | #undef avg |
24 | #undef ss |
25 | |
26 | #if SS_W == 0 |
27 | #define ss 444 |
28 | #define avg(a,b,c,d) (a) |
29 | #elif SS_H == 0 |
30 | #define ss 422 |
31 | #define avg(a,b,c,d) (((a) + (b) + 1) >> 1) |
32 | #else |
33 | #define ss 420 |
34 | #define avg(a,b,c,d) (((a) + (b) + (c) + (d) + 2) >> 2) |
35 | #endif |
36 | |
37 | #undef fn |
38 | #undef fn2 |
39 | #undef fn3 |
40 | #define fn3(a,b,c) a##_##c##p##b##_c |
41 | #define fn2(a,b,c) fn3(a,b,c) |
42 | #define fn(a) fn2(a, BIT_DEPTH, ss) |
43 | |
44 | #undef pixel |
45 | #undef av_clip_pixel |
46 | #if BIT_DEPTH == 8 |
47 | #define pixel uint8_t |
48 | #define av_clip_pixel(x) av_clip_uint8(x) |
49 | #else |
50 | #define pixel uint16_t |
51 | #define av_clip_pixel(x) av_clip_uintp2(x, BIT_DEPTH) |
52 | #endif |
53 | |
54 | static void fn(yuv2rgb)(int16_t *rgb[3], ptrdiff_t rgb_stride, |
55 | uint8_t *_yuv[3], const ptrdiff_t yuv_stride[3], |
56 | int w, int h, const int16_t yuv2rgb_coeffs[3][3][8], |
57 | const int16_t yuv_offset[8]) |
58 | { |
59 | pixel **yuv = (pixel **) _yuv; |
60 | const pixel *yuv0 = yuv[0], *yuv1 = yuv[1], *yuv2 = yuv[2]; |
61 | int16_t *rgb0 = rgb[0], *rgb1 = rgb[1], *rgb2 = rgb[2]; |
62 | int y, x; |
63 | int cy = yuv2rgb_coeffs[0][0][0]; |
64 | int crv = yuv2rgb_coeffs[0][2][0]; |
65 | int cgu = yuv2rgb_coeffs[1][1][0]; |
66 | int cgv = yuv2rgb_coeffs[1][2][0]; |
67 | int cbu = yuv2rgb_coeffs[2][1][0]; |
68 | const int sh = BIT_DEPTH - 1, rnd = 1 << (sh - 1); |
69 | const int uv_offset = 128 << (BIT_DEPTH - 8); |
70 | |
71 | av_assert2(yuv2rgb_coeffs[0][1][0] == 0); |
72 | av_assert2(yuv2rgb_coeffs[2][2][0] == 0); |
73 | av_assert2(yuv2rgb_coeffs[1][0][0] == cy && yuv2rgb_coeffs[2][0][0] == cy); |
74 | |
75 | w = AV_CEIL_RSHIFT(w, SS_W); |
76 | h = AV_CEIL_RSHIFT(h, SS_H); |
77 | for (y = 0; y < h; y++) { |
78 | for (x = 0; x < w; x++) { |
79 | int y00 = yuv0[x << SS_W] - yuv_offset[0]; |
80 | #if SS_W == 1 |
81 | int y01 = yuv0[2 * x + 1] - yuv_offset[0]; |
82 | #if SS_H == 1 |
83 | int y10 = yuv0[yuv_stride[0] / sizeof(pixel) + 2 * x] - yuv_offset[0]; |
84 | int y11 = yuv0[yuv_stride[0] / sizeof(pixel) + 2 * x + 1] - yuv_offset[0]; |
85 | #endif |
86 | #endif |
87 | int u = yuv1[x] - uv_offset, v = yuv2[x] - uv_offset; |
88 | |
89 | rgb0[x << SS_W] = av_clip_int16((y00 * cy + crv * v + rnd) >> sh); |
90 | #if SS_W == 1 |
91 | rgb0[2 * x + 1] = av_clip_int16((y01 * cy + crv * v + rnd) >> sh); |
92 | #if SS_H == 1 |
93 | rgb0[2 * x + rgb_stride] = av_clip_int16((y10 * cy + crv * v + rnd) >> sh); |
94 | rgb0[2 * x + rgb_stride + 1] = av_clip_int16((y11 * cy + crv * v + rnd) >> sh); |
95 | #endif |
96 | #endif |
97 | |
98 | rgb1[x << SS_W] = av_clip_int16((y00 * cy + cgu * u + |
99 | cgv * v + rnd) >> sh); |
100 | #if SS_W == 1 |
101 | rgb1[2 * x + 1] = av_clip_int16((y01 * cy + cgu * u + |
102 | cgv * v + rnd) >> sh); |
103 | #if SS_H == 1 |
104 | rgb1[2 * x + rgb_stride] = av_clip_int16((y10 * cy + cgu * u + |
105 | cgv * v + rnd) >> sh); |
106 | rgb1[2 * x + rgb_stride + 1] = av_clip_int16((y11 * cy + cgu * u + |
107 | cgv * v + rnd) >> sh); |
108 | #endif |
109 | #endif |
110 | |
111 | rgb2[x << SS_W] = av_clip_int16((y00 * cy + cbu * u + rnd) >> sh); |
112 | #if SS_W == 1 |
113 | rgb2[2 * x + 1] = av_clip_int16((y01 * cy + cbu * u + rnd) >> sh); |
114 | #if SS_H == 1 |
115 | rgb2[2 * x + rgb_stride] = av_clip_int16((y10 * cy + cbu * u + rnd) >> sh); |
116 | rgb2[2 * x + rgb_stride + 1] = av_clip_int16((y11 * cy + cbu * u + rnd) >> sh); |
117 | #endif |
118 | #endif |
119 | } |
120 | |
121 | yuv0 += (yuv_stride[0] * (1 << SS_H)) / sizeof(pixel); |
122 | yuv1 += yuv_stride[1] / sizeof(pixel); |
123 | yuv2 += yuv_stride[2] / sizeof(pixel); |
124 | rgb0 += rgb_stride * (1 << SS_H); |
125 | rgb1 += rgb_stride * (1 << SS_H); |
126 | rgb2 += rgb_stride * (1 << SS_H); |
127 | } |
128 | } |
129 | |
130 | static void fn(rgb2yuv)(uint8_t *_yuv[3], const ptrdiff_t yuv_stride[3], |
131 | int16_t *rgb[3], ptrdiff_t s, |
132 | int w, int h, const int16_t rgb2yuv_coeffs[3][3][8], |
133 | const int16_t yuv_offset[8]) |
134 | { |
135 | pixel **yuv = (pixel **) _yuv; |
136 | pixel *yuv0 = yuv[0], *yuv1 = yuv[1], *yuv2 = yuv[2]; |
137 | const int16_t *rgb0 = rgb[0], *rgb1 = rgb[1], *rgb2 = rgb[2]; |
138 | int y, x; |
139 | const int sh = 29 - BIT_DEPTH; |
140 | const int rnd = 1 << (sh - 1); |
141 | int cry = rgb2yuv_coeffs[0][0][0]; |
142 | int cgy = rgb2yuv_coeffs[0][1][0]; |
143 | int cby = rgb2yuv_coeffs[0][2][0]; |
144 | int cru = rgb2yuv_coeffs[1][0][0]; |
145 | int cgu = rgb2yuv_coeffs[1][1][0]; |
146 | int cburv = rgb2yuv_coeffs[1][2][0]; |
147 | int cgv = rgb2yuv_coeffs[2][1][0]; |
148 | int cbv = rgb2yuv_coeffs[2][2][0]; |
149 | ptrdiff_t s0 = yuv_stride[0] / sizeof(pixel); |
150 | const int uv_offset = 128 << (BIT_DEPTH - 8); |
151 | |
152 | av_assert2(rgb2yuv_coeffs[1][2][0] == rgb2yuv_coeffs[2][0][0]); |
153 | w = AV_CEIL_RSHIFT(w, SS_W); |
154 | h = AV_CEIL_RSHIFT(h, SS_H); |
155 | for (y = 0; y < h; y++) { |
156 | for (x = 0; x < w; x++) { |
157 | int r00 = rgb0[x << SS_W], g00 = rgb1[x << SS_W], b00 = rgb2[x << SS_W]; |
158 | #if SS_W == 1 |
159 | int r01 = rgb0[x * 2 + 1], g01 = rgb1[x * 2 + 1], b01 = rgb2[x * 2 + 1]; |
160 | #if SS_H == 1 |
161 | int r10 = rgb0[x * 2 + 0 + s], g10 = rgb1[x * 2 + 0 + s], b10 = rgb2[x * 2 + 0 + s]; |
162 | int r11 = rgb0[x * 2 + 1 + s], g11 = rgb1[x * 2 + 1 + s], b11 = rgb2[x * 2 + 1 + s]; |
163 | #endif |
164 | #endif |
165 | |
166 | yuv0[x << SS_W] = av_clip_pixel(yuv_offset[0] + |
167 | ((r00 * cry + g00 * cgy + |
168 | b00 * cby + rnd) >> sh)); |
169 | #if SS_W == 1 |
170 | yuv0[x * 2 + 1] = av_clip_pixel(yuv_offset[0] + |
171 | ((r01 * cry + g01 * cgy + |
172 | b01 * cby + rnd) >> sh)); |
173 | #if SS_H == 1 |
174 | yuv0[x * 2 + 0 + s0] = av_clip_pixel(yuv_offset[0] + |
175 | ((r10 * cry + g10 * cgy + |
176 | b10 * cby + rnd) >> sh)); |
177 | yuv0[x * 2 + 1 + s0] = av_clip_pixel(yuv_offset[0] + |
178 | ((r11 * cry + g11 * cgy + |
179 | b11 * cby + rnd) >> sh)); |
180 | #endif |
181 | #endif |
182 | |
183 | yuv1[x] = av_clip_pixel(uv_offset + |
184 | ((avg(r00, r01, r10, r11) * cru + |
185 | avg(g00, g01, g10, g11) * cgu + |
186 | avg(b00, b01, b10, b11) * cburv + rnd) >> sh)); |
187 | yuv2[x] = av_clip_pixel(uv_offset + |
188 | ((avg(r00, r01, r10, r11) * cburv + |
189 | avg(g00, g01, g10, g11) * cgv + |
190 | avg(b00, b01, b10, b11) * cbv + rnd) >> sh)); |
191 | } |
192 | |
193 | yuv0 += s0 * (1 << SS_H); |
194 | yuv1 += yuv_stride[1] / sizeof(pixel); |
195 | yuv2 += yuv_stride[2] / sizeof(pixel); |
196 | rgb0 += s * (1 << SS_H); |
197 | rgb1 += s * (1 << SS_H); |
198 | rgb2 += s * (1 << SS_H); |
199 | } |
200 | } |
201 | |
202 | /* floyd-steinberg dithering - for any mid-top pixel A in a 3x2 block of pixels: |
203 | * 1 A 2 |
204 | * 3 4 5 |
205 | * the rounding error is distributed over the neighbouring pixels: |
206 | * 2: 7/16th, 3: 3/16th, 4: 5/16th and 5: 1/16th |
207 | */ |
208 | static void fn(rgb2yuv_fsb)(uint8_t *_yuv[3], const ptrdiff_t yuv_stride[3], |
209 | int16_t *rgb[3], ptrdiff_t s, |
210 | int w, int h, const int16_t rgb2yuv_coeffs[3][3][8], |
211 | const int16_t yuv_offset[8], |
212 | int *rnd_scratch[3][2]) |
213 | { |
214 | pixel **yuv = (pixel **) _yuv; |
215 | pixel *yuv0 = yuv[0], *yuv1 = yuv[1], *yuv2 = yuv[2]; |
216 | const int16_t *rgb0 = rgb[0], *rgb1 = rgb[1], *rgb2 = rgb[2]; |
217 | int y, x; |
218 | const int sh = 29 - BIT_DEPTH; |
219 | const int rnd = 1 << (sh - 1); |
220 | int cry = rgb2yuv_coeffs[0][0][0]; |
221 | int cgy = rgb2yuv_coeffs[0][1][0]; |
222 | int cby = rgb2yuv_coeffs[0][2][0]; |
223 | int cru = rgb2yuv_coeffs[1][0][0]; |
224 | int cgu = rgb2yuv_coeffs[1][1][0]; |
225 | int cburv = rgb2yuv_coeffs[1][2][0]; |
226 | int cgv = rgb2yuv_coeffs[2][1][0]; |
227 | int cbv = rgb2yuv_coeffs[2][2][0]; |
228 | ptrdiff_t s0 = yuv_stride[0] / sizeof(pixel); |
229 | const int uv_offset = 128 << (BIT_DEPTH - 8); |
230 | unsigned mask = (1 << sh) - 1; |
231 | |
232 | for (x = 0; x < w; x++) { |
233 | rnd_scratch[0][0][x] = |
234 | rnd_scratch[0][1][x] = rnd; |
235 | } |
236 | av_assert2(rgb2yuv_coeffs[1][2][0] == rgb2yuv_coeffs[2][0][0]); |
237 | w = AV_CEIL_RSHIFT(w, SS_W); |
238 | h = AV_CEIL_RSHIFT(h, SS_H); |
239 | for (x = 0; x < w; x++) { |
240 | rnd_scratch[1][0][x] = |
241 | rnd_scratch[1][1][x] = |
242 | rnd_scratch[2][0][x] = |
243 | rnd_scratch[2][1][x] = rnd; |
244 | } |
245 | for (y = 0; y < h; y++) { |
246 | for (x = 0; x < w; x++) { |
247 | int r00 = rgb0[x << SS_W], g00 = rgb1[x << SS_W], b00 = rgb2[x << SS_W]; |
248 | int y00; |
249 | #if SS_W == 1 |
250 | int r01 = rgb0[x * 2 + 1], g01 = rgb1[x * 2 + 1], b01 = rgb2[x * 2 + 1]; |
251 | int y01; |
252 | #if SS_H == 1 |
253 | int r10 = rgb0[x * 2 + 0 + s], g10 = rgb1[x * 2 + 0 + s], b10 = rgb2[x * 2 + 0 + s]; |
254 | int r11 = rgb0[x * 2 + 1 + s], g11 = rgb1[x * 2 + 1 + s], b11 = rgb2[x * 2 + 1 + s]; |
255 | int y10, y11; |
256 | #endif |
257 | #endif |
258 | int u, v, diff; |
259 | |
260 | y00 = r00 * cry + g00 * cgy + b00 * cby + rnd_scratch[0][y & !SS_H][x << SS_W]; |
261 | diff = (y00 & mask) - rnd; |
262 | yuv0[x << SS_W] = av_clip_pixel(yuv_offset[0] + (y00 >> sh)); |
263 | rnd_scratch[0][ (y & !SS_H)][(x << SS_W) + 1] += (diff * 7 + 8) >> 4; |
264 | rnd_scratch[0][!(y & !SS_H)][(x << SS_W) - 1] += (diff * 3 + 8) >> 4; |
265 | rnd_scratch[0][!(y & !SS_H)][(x << SS_W) + 0] += (diff * 5 + 8) >> 4; |
266 | rnd_scratch[0][!(y & !SS_H)][(x << SS_W) + 1] += (diff * 1 + 8) >> 4; |
267 | rnd_scratch[0][ (y & !SS_H)][(x << SS_W) + 0] = rnd; |
268 | #if SS_W == 1 |
269 | y01 = r01 * cry + g01 * cgy + b01 * cby + rnd_scratch[0][y & !SS_H][x * 2 + 1]; |
270 | diff = (y01 & mask) - rnd; |
271 | yuv0[x * 2 + 1] = av_clip_pixel(yuv_offset[0] + (y01 >> sh)); |
272 | rnd_scratch[0][ (y & !SS_H)][x * 2 + 2] += (diff * 7 + 8) >> 4; |
273 | rnd_scratch[0][!(y & !SS_H)][x * 2 + 0] += (diff * 3 + 8) >> 4; |
274 | rnd_scratch[0][!(y & !SS_H)][x * 2 + 1] += (diff * 5 + 8) >> 4; |
275 | rnd_scratch[0][!(y & !SS_H)][x * 2 + 2] += (diff * 1 + 8) >> 4; |
276 | rnd_scratch[0][ (y & !SS_H)][x * 2 + 1] = rnd; |
277 | #if SS_H == 1 |
278 | y10 = r10 * cry + g10 * cgy + b10 * cby + rnd_scratch[0][1][x * 2 + 0]; |
279 | diff = (y10 & mask) - rnd; |
280 | yuv0[x * 2 + 0 + s0] = av_clip_pixel(yuv_offset[0] + (y10 >> sh)); |
281 | rnd_scratch[0][1][x * 2 + 1] += (diff * 7 + 8) >> 4; |
282 | rnd_scratch[0][0][x * 2 - 1] += (diff * 3 + 8) >> 4; |
283 | rnd_scratch[0][0][x * 2 + 0] += (diff * 5 + 8) >> 4; |
284 | rnd_scratch[0][0][x * 2 + 1] += (diff * 1 + 8) >> 4; |
285 | rnd_scratch[0][1][x * 2 + 0] = rnd; |
286 | |
287 | y11 = r11 * cry + g11 * cgy + b11 * cby + rnd_scratch[0][1][x * 2 + 1]; |
288 | diff = (y11 & mask) - rnd; |
289 | yuv0[x * 2 + 1 + s0] = av_clip_pixel(yuv_offset[0] + (y11 >> sh)); |
290 | rnd_scratch[0][1][x * 2 + 2] += (diff * 7 + 8) >> 4; |
291 | rnd_scratch[0][0][x * 2 + 0] += (diff * 3 + 8) >> 4; |
292 | rnd_scratch[0][0][x * 2 + 1] += (diff * 5 + 8) >> 4; |
293 | rnd_scratch[0][0][x * 2 + 2] += (diff * 1 + 8) >> 4; |
294 | rnd_scratch[0][1][x * 2 + 1] = rnd; |
295 | #endif |
296 | #endif |
297 | |
298 | u = avg(r00, r01, r10, r11) * cru + |
299 | avg(g00, g01, g10, g11) * cgu + |
300 | avg(b00, b01, b10, b11) * cburv + rnd_scratch[1][y & 1][x]; |
301 | diff = (u & mask) - rnd; |
302 | yuv1[x] = av_clip_pixel(uv_offset + (u >> sh)); |
303 | rnd_scratch[1][ (y & 1)][x + 1] += (diff * 7 + 8) >> 4; |
304 | rnd_scratch[1][!(y & 1)][x - 1] += (diff * 3 + 8) >> 4; |
305 | rnd_scratch[1][!(y & 1)][x + 0] += (diff * 5 + 8) >> 4; |
306 | rnd_scratch[1][!(y & 1)][x + 1] += (diff * 1 + 8) >> 4; |
307 | rnd_scratch[1][ (y & 1)][x + 0] = rnd; |
308 | |
309 | v = avg(r00, r01, r10, r11) * cburv + |
310 | avg(g00, g01, g10, g11) * cgv + |
311 | avg(b00, b01, b10, b11) * cbv + rnd_scratch[2][y & 1][x]; |
312 | diff = (v & mask) - rnd; |
313 | yuv2[x] = av_clip_pixel(uv_offset + (v >> sh)); |
314 | rnd_scratch[2][ (y & 1)][x + 1] += (diff * 7 + 8) >> 4; |
315 | rnd_scratch[2][!(y & 1)][x - 1] += (diff * 3 + 8) >> 4; |
316 | rnd_scratch[2][!(y & 1)][x + 0] += (diff * 5 + 8) >> 4; |
317 | rnd_scratch[2][!(y & 1)][x + 1] += (diff * 1 + 8) >> 4; |
318 | rnd_scratch[2][ (y & 1)][x + 0] = rnd; |
319 | } |
320 | |
321 | yuv0 += s0 * (1 << SS_H); |
322 | yuv1 += yuv_stride[1] / sizeof(pixel); |
323 | yuv2 += yuv_stride[2] / sizeof(pixel); |
324 | rgb0 += s * (1 << SS_H); |
325 | rgb1 += s * (1 << SS_H); |
326 | rgb2 += s * (1 << SS_H); |
327 | } |
328 | } |
329 | |
330 | #undef IN_BIT_DEPTH |
331 | #undef OUT_BIT_DEPTH |
332 | #define OUT_BIT_DEPTH BIT_DEPTH |
333 | #define IN_BIT_DEPTH 8 |
334 | #include "colorspacedsp_yuv2yuv_template.c" |
335 | |
336 | #undef IN_BIT_DEPTH |
337 | #define IN_BIT_DEPTH 10 |
338 | #include "colorspacedsp_yuv2yuv_template.c" |
339 | |
340 | #undef IN_BIT_DEPTH |
341 | #define IN_BIT_DEPTH 12 |
342 | #include "colorspacedsp_yuv2yuv_template.c" |
343 |