blob: 25f1a812021eb32e9035bae2d0147961c019c75b
1 | /* |
2 | * HEVC video decoder |
3 | * |
4 | * Copyright (C) 2012 - 2013 Guillaume Martres |
5 | * |
6 | * This file is part of FFmpeg. |
7 | * |
8 | * FFmpeg is free software; you can redistribute it and/or |
9 | * modify it under the terms of the GNU Lesser General Public |
10 | * License as published by the Free Software Foundation; either |
11 | * version 2.1 of the License, or (at your option) any later version. |
12 | * |
13 | * FFmpeg is distributed in the hope that it will be useful, |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 | * Lesser General Public License for more details. |
17 | * |
18 | * You should have received a copy of the GNU Lesser General Public |
19 | * License along with FFmpeg; if not, write to the Free Software |
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
21 | */ |
22 | |
23 | #include "get_bits.h" |
24 | #include "hevcdec.h" |
25 | |
26 | #include "bit_depth_template.c" |
27 | #include "hevcdsp.h" |
28 | |
29 | static void FUNC(put_pcm)(uint8_t *_dst, ptrdiff_t stride, int width, int height, |
30 | GetBitContext *gb, int pcm_bit_depth) |
31 | { |
32 | int x, y; |
33 | pixel *dst = (pixel *)_dst; |
34 | |
35 | stride /= sizeof(pixel); |
36 | |
37 | for (y = 0; y < height; y++) { |
38 | for (x = 0; x < width; x++) |
39 | dst[x] = get_bits(gb, pcm_bit_depth) << (BIT_DEPTH - pcm_bit_depth); |
40 | dst += stride; |
41 | } |
42 | } |
43 | |
44 | static av_always_inline void FUNC(add_residual)(uint8_t *_dst, int16_t *res, |
45 | ptrdiff_t stride, int size) |
46 | { |
47 | int x, y; |
48 | pixel *dst = (pixel *)_dst; |
49 | |
50 | stride /= sizeof(pixel); |
51 | |
52 | for (y = 0; y < size; y++) { |
53 | for (x = 0; x < size; x++) { |
54 | dst[x] = av_clip_pixel(dst[x] + *res); |
55 | res++; |
56 | } |
57 | dst += stride; |
58 | } |
59 | } |
60 | |
61 | static void FUNC(add_residual4x4)(uint8_t *_dst, int16_t *res, |
62 | ptrdiff_t stride) |
63 | { |
64 | FUNC(add_residual)(_dst, res, stride, 4); |
65 | } |
66 | |
67 | static void FUNC(add_residual8x8)(uint8_t *_dst, int16_t *res, |
68 | ptrdiff_t stride) |
69 | { |
70 | FUNC(add_residual)(_dst, res, stride, 8); |
71 | } |
72 | |
73 | static void FUNC(add_residual16x16)(uint8_t *_dst, int16_t *res, |
74 | ptrdiff_t stride) |
75 | { |
76 | FUNC(add_residual)(_dst, res, stride, 16); |
77 | } |
78 | |
79 | static void FUNC(add_residual32x32)(uint8_t *_dst, int16_t *res, |
80 | ptrdiff_t stride) |
81 | { |
82 | FUNC(add_residual)(_dst, res, stride, 32); |
83 | } |
84 | |
85 | static void FUNC(transform_rdpcm)(int16_t *_coeffs, int16_t log2_size, int mode) |
86 | { |
87 | int16_t *coeffs = (int16_t *) _coeffs; |
88 | int x, y; |
89 | int size = 1 << log2_size; |
90 | |
91 | if (mode) { |
92 | coeffs += size; |
93 | for (y = 0; y < size - 1; y++) { |
94 | for (x = 0; x < size; x++) |
95 | coeffs[x] += coeffs[x - size]; |
96 | coeffs += size; |
97 | } |
98 | } else { |
99 | for (y = 0; y < size; y++) { |
100 | for (x = 1; x < size; x++) |
101 | coeffs[x] += coeffs[x - 1]; |
102 | coeffs += size; |
103 | } |
104 | } |
105 | } |
106 | |
107 | static void FUNC(dequant)(int16_t *coeffs, int16_t log2_size) |
108 | { |
109 | int shift = 15 - BIT_DEPTH - log2_size; |
110 | int x, y; |
111 | int size = 1 << log2_size; |
112 | |
113 | if (shift > 0) { |
114 | int offset = 1 << (shift - 1); |
115 | for (y = 0; y < size; y++) { |
116 | for (x = 0; x < size; x++) { |
117 | *coeffs = (*coeffs + offset) >> shift; |
118 | coeffs++; |
119 | } |
120 | } |
121 | } else { |
122 | for (y = 0; y < size; y++) { |
123 | for (x = 0; x < size; x++) { |
124 | *coeffs = *coeffs << -shift; |
125 | coeffs++; |
126 | } |
127 | } |
128 | } |
129 | } |
130 | |
131 | #define SET(dst, x) (dst) = (x) |
132 | #define SCALE(dst, x) (dst) = av_clip_int16(((x) + add) >> shift) |
133 | |
134 | #define TR_4x4_LUMA(dst, src, step, assign) \ |
135 | do { \ |
136 | int c0 = src[0 * step] + src[2 * step]; \ |
137 | int c1 = src[2 * step] + src[3 * step]; \ |
138 | int c2 = src[0 * step] - src[3 * step]; \ |
139 | int c3 = 74 * src[1 * step]; \ |
140 | \ |
141 | assign(dst[2 * step], 74 * (src[0 * step] - \ |
142 | src[2 * step] + \ |
143 | src[3 * step])); \ |
144 | assign(dst[0 * step], 29 * c0 + 55 * c1 + c3); \ |
145 | assign(dst[1 * step], 55 * c2 - 29 * c1 + c3); \ |
146 | assign(dst[3 * step], 55 * c0 + 29 * c2 - c3); \ |
147 | } while (0) |
148 | |
149 | static void FUNC(transform_4x4_luma)(int16_t *coeffs) |
150 | { |
151 | int i; |
152 | int shift = 7; |
153 | int add = 1 << (shift - 1); |
154 | int16_t *src = coeffs; |
155 | |
156 | for (i = 0; i < 4; i++) { |
157 | TR_4x4_LUMA(src, src, 4, SCALE); |
158 | src++; |
159 | } |
160 | |
161 | shift = 20 - BIT_DEPTH; |
162 | add = 1 << (shift - 1); |
163 | for (i = 0; i < 4; i++) { |
164 | TR_4x4_LUMA(coeffs, coeffs, 1, SCALE); |
165 | coeffs += 4; |
166 | } |
167 | } |
168 | |
169 | #undef TR_4x4_LUMA |
170 | |
171 | #define TR_4(dst, src, dstep, sstep, assign, end) \ |
172 | do { \ |
173 | const int e0 = 64 * src[0 * sstep] + 64 * src[2 * sstep]; \ |
174 | const int e1 = 64 * src[0 * sstep] - 64 * src[2 * sstep]; \ |
175 | const int o0 = 83 * src[1 * sstep] + 36 * src[3 * sstep]; \ |
176 | const int o1 = 36 * src[1 * sstep] - 83 * src[3 * sstep]; \ |
177 | \ |
178 | assign(dst[0 * dstep], e0 + o0); \ |
179 | assign(dst[1 * dstep], e1 + o1); \ |
180 | assign(dst[2 * dstep], e1 - o1); \ |
181 | assign(dst[3 * dstep], e0 - o0); \ |
182 | } while (0) |
183 | |
184 | #define TR_8(dst, src, dstep, sstep, assign, end) \ |
185 | do { \ |
186 | int i, j; \ |
187 | int e_8[4]; \ |
188 | int o_8[4] = { 0 }; \ |
189 | for (i = 0; i < 4; i++) \ |
190 | for (j = 1; j < end; j += 2) \ |
191 | o_8[i] += transform[4 * j][i] * src[j * sstep]; \ |
192 | TR_4(e_8, src, 1, 2 * sstep, SET, 4); \ |
193 | \ |
194 | for (i = 0; i < 4; i++) { \ |
195 | assign(dst[i * dstep], e_8[i] + o_8[i]); \ |
196 | assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]); \ |
197 | } \ |
198 | } while (0) |
199 | |
200 | #define TR_16(dst, src, dstep, sstep, assign, end) \ |
201 | do { \ |
202 | int i, j; \ |
203 | int e_16[8]; \ |
204 | int o_16[8] = { 0 }; \ |
205 | for (i = 0; i < 8; i++) \ |
206 | for (j = 1; j < end; j += 2) \ |
207 | o_16[i] += transform[2 * j][i] * src[j * sstep]; \ |
208 | TR_8(e_16, src, 1, 2 * sstep, SET, 8); \ |
209 | \ |
210 | for (i = 0; i < 8; i++) { \ |
211 | assign(dst[i * dstep], e_16[i] + o_16[i]); \ |
212 | assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]); \ |
213 | } \ |
214 | } while (0) |
215 | |
216 | #define TR_32(dst, src, dstep, sstep, assign, end) \ |
217 | do { \ |
218 | int i, j; \ |
219 | int e_32[16]; \ |
220 | int o_32[16] = { 0 }; \ |
221 | for (i = 0; i < 16; i++) \ |
222 | for (j = 1; j < end; j += 2) \ |
223 | o_32[i] += transform[j][i] * src[j * sstep]; \ |
224 | TR_16(e_32, src, 1, 2 * sstep, SET, end / 2); \ |
225 | \ |
226 | for (i = 0; i < 16; i++) { \ |
227 | assign(dst[i * dstep], e_32[i] + o_32[i]); \ |
228 | assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]); \ |
229 | } \ |
230 | } while (0) |
231 | |
232 | #define IDCT_VAR4(H) \ |
233 | int limit2 = FFMIN(col_limit + 4, H) |
234 | #define IDCT_VAR8(H) \ |
235 | int limit = FFMIN(col_limit, H); \ |
236 | int limit2 = FFMIN(col_limit + 4, H) |
237 | #define IDCT_VAR16(H) IDCT_VAR8(H) |
238 | #define IDCT_VAR32(H) IDCT_VAR8(H) |
239 | |
240 | #define IDCT(H) \ |
241 | static void FUNC(idct_ ## H ## x ## H )(int16_t *coeffs, \ |
242 | int col_limit) \ |
243 | { \ |
244 | int i; \ |
245 | int shift = 7; \ |
246 | int add = 1 << (shift - 1); \ |
247 | int16_t *src = coeffs; \ |
248 | IDCT_VAR ## H(H); \ |
249 | \ |
250 | for (i = 0; i < H; i++) { \ |
251 | TR_ ## H(src, src, H, H, SCALE, limit2); \ |
252 | if (limit2 < H && i%4 == 0 && !!i) \ |
253 | limit2 -= 4; \ |
254 | src++; \ |
255 | } \ |
256 | \ |
257 | shift = 20 - BIT_DEPTH; \ |
258 | add = 1 << (shift - 1); \ |
259 | for (i = 0; i < H; i++) { \ |
260 | TR_ ## H(coeffs, coeffs, 1, 1, SCALE, limit); \ |
261 | coeffs += H; \ |
262 | } \ |
263 | } |
264 | |
265 | #define IDCT_DC(H) \ |
266 | static void FUNC(idct_ ## H ## x ## H ## _dc)(int16_t *coeffs) \ |
267 | { \ |
268 | int i, j; \ |
269 | int shift = 14 - BIT_DEPTH; \ |
270 | int add = 1 << (shift - 1); \ |
271 | int coeff = (((coeffs[0] + 1) >> 1) + add) >> shift; \ |
272 | \ |
273 | for (j = 0; j < H; j++) { \ |
274 | for (i = 0; i < H; i++) { \ |
275 | coeffs[i + j * H] = coeff; \ |
276 | } \ |
277 | } \ |
278 | } |
279 | |
280 | IDCT( 4) |
281 | IDCT( 8) |
282 | IDCT(16) |
283 | IDCT(32) |
284 | |
285 | IDCT_DC( 4) |
286 | IDCT_DC( 8) |
287 | IDCT_DC(16) |
288 | IDCT_DC(32) |
289 | |
290 | #undef TR_4 |
291 | #undef TR_8 |
292 | #undef TR_16 |
293 | #undef TR_32 |
294 | |
295 | #undef SET |
296 | #undef SCALE |
297 | |
298 | static void FUNC(sao_band_filter)(uint8_t *_dst, uint8_t *_src, |
299 | ptrdiff_t stride_dst, ptrdiff_t stride_src, |
300 | int16_t *sao_offset_val, int sao_left_class, |
301 | int width, int height) |
302 | { |
303 | pixel *dst = (pixel *)_dst; |
304 | pixel *src = (pixel *)_src; |
305 | int offset_table[32] = { 0 }; |
306 | int k, y, x; |
307 | int shift = BIT_DEPTH - 5; |
308 | |
309 | stride_dst /= sizeof(pixel); |
310 | stride_src /= sizeof(pixel); |
311 | |
312 | for (k = 0; k < 4; k++) |
313 | offset_table[(k + sao_left_class) & 31] = sao_offset_val[k + 1]; |
314 | for (y = 0; y < height; y++) { |
315 | for (x = 0; x < width; x++) |
316 | dst[x] = av_clip_pixel(src[x] + offset_table[src[x] >> shift]); |
317 | dst += stride_dst; |
318 | src += stride_src; |
319 | } |
320 | } |
321 | |
322 | #define CMP(a, b) (((a) > (b)) - ((a) < (b))) |
323 | |
324 | static void FUNC(sao_edge_filter)(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val, |
325 | int eo, int width, int height) { |
326 | |
327 | static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 }; |
328 | static const int8_t pos[4][2][2] = { |
329 | { { -1, 0 }, { 1, 0 } }, // horizontal |
330 | { { 0, -1 }, { 0, 1 } }, // vertical |
331 | { { -1, -1 }, { 1, 1 } }, // 45 degree |
332 | { { 1, -1 }, { -1, 1 } }, // 135 degree |
333 | }; |
334 | pixel *dst = (pixel *)_dst; |
335 | pixel *src = (pixel *)_src; |
336 | int a_stride, b_stride; |
337 | int x, y; |
338 | ptrdiff_t stride_src = (2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE) / sizeof(pixel); |
339 | stride_dst /= sizeof(pixel); |
340 | |
341 | a_stride = pos[eo][0][0] + pos[eo][0][1] * stride_src; |
342 | b_stride = pos[eo][1][0] + pos[eo][1][1] * stride_src; |
343 | for (y = 0; y < height; y++) { |
344 | for (x = 0; x < width; x++) { |
345 | int diff0 = CMP(src[x], src[x + a_stride]); |
346 | int diff1 = CMP(src[x], src[x + b_stride]); |
347 | int offset_val = edge_idx[2 + diff0 + diff1]; |
348 | dst[x] = av_clip_pixel(src[x] + sao_offset_val[offset_val]); |
349 | } |
350 | src += stride_src; |
351 | dst += stride_dst; |
352 | } |
353 | } |
354 | |
355 | static void FUNC(sao_edge_restore_0)(uint8_t *_dst, uint8_t *_src, |
356 | ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao, |
357 | int *borders, int _width, int _height, |
358 | int c_idx, uint8_t *vert_edge, |
359 | uint8_t *horiz_edge, uint8_t *diag_edge) |
360 | { |
361 | int x, y; |
362 | pixel *dst = (pixel *)_dst; |
363 | pixel *src = (pixel *)_src; |
364 | int16_t *sao_offset_val = sao->offset_val[c_idx]; |
365 | int sao_eo_class = sao->eo_class[c_idx]; |
366 | int init_x = 0, width = _width, height = _height; |
367 | |
368 | stride_dst /= sizeof(pixel); |
369 | stride_src /= sizeof(pixel); |
370 | |
371 | if (sao_eo_class != SAO_EO_VERT) { |
372 | if (borders[0]) { |
373 | int offset_val = sao_offset_val[0]; |
374 | for (y = 0; y < height; y++) { |
375 | dst[y * stride_dst] = av_clip_pixel(src[y * stride_src] + offset_val); |
376 | } |
377 | init_x = 1; |
378 | } |
379 | if (borders[2]) { |
380 | int offset_val = sao_offset_val[0]; |
381 | int offset = width - 1; |
382 | for (x = 0; x < height; x++) { |
383 | dst[x * stride_dst + offset] = av_clip_pixel(src[x * stride_src + offset] + offset_val); |
384 | } |
385 | width--; |
386 | } |
387 | } |
388 | if (sao_eo_class != SAO_EO_HORIZ) { |
389 | if (borders[1]) { |
390 | int offset_val = sao_offset_val[0]; |
391 | for (x = init_x; x < width; x++) |
392 | dst[x] = av_clip_pixel(src[x] + offset_val); |
393 | } |
394 | if (borders[3]) { |
395 | int offset_val = sao_offset_val[0]; |
396 | ptrdiff_t y_stride_dst = stride_dst * (height - 1); |
397 | ptrdiff_t y_stride_src = stride_src * (height - 1); |
398 | for (x = init_x; x < width; x++) |
399 | dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val); |
400 | height--; |
401 | } |
402 | } |
403 | } |
404 | |
405 | static void FUNC(sao_edge_restore_1)(uint8_t *_dst, uint8_t *_src, |
406 | ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao, |
407 | int *borders, int _width, int _height, |
408 | int c_idx, uint8_t *vert_edge, |
409 | uint8_t *horiz_edge, uint8_t *diag_edge) |
410 | { |
411 | int x, y; |
412 | pixel *dst = (pixel *)_dst; |
413 | pixel *src = (pixel *)_src; |
414 | int16_t *sao_offset_val = sao->offset_val[c_idx]; |
415 | int sao_eo_class = sao->eo_class[c_idx]; |
416 | int init_x = 0, init_y = 0, width = _width, height = _height; |
417 | |
418 | stride_dst /= sizeof(pixel); |
419 | stride_src /= sizeof(pixel); |
420 | |
421 | if (sao_eo_class != SAO_EO_VERT) { |
422 | if (borders[0]) { |
423 | int offset_val = sao_offset_val[0]; |
424 | for (y = 0; y < height; y++) { |
425 | dst[y * stride_dst] = av_clip_pixel(src[y * stride_src] + offset_val); |
426 | } |
427 | init_x = 1; |
428 | } |
429 | if (borders[2]) { |
430 | int offset_val = sao_offset_val[0]; |
431 | int offset = width - 1; |
432 | for (x = 0; x < height; x++) { |
433 | dst[x * stride_dst + offset] = av_clip_pixel(src[x * stride_src + offset] + offset_val); |
434 | } |
435 | width--; |
436 | } |
437 | } |
438 | if (sao_eo_class != SAO_EO_HORIZ) { |
439 | if (borders[1]) { |
440 | int offset_val = sao_offset_val[0]; |
441 | for (x = init_x; x < width; x++) |
442 | dst[x] = av_clip_pixel(src[x] + offset_val); |
443 | init_y = 1; |
444 | } |
445 | if (borders[3]) { |
446 | int offset_val = sao_offset_val[0]; |
447 | ptrdiff_t y_stride_dst = stride_dst * (height - 1); |
448 | ptrdiff_t y_stride_src = stride_src * (height - 1); |
449 | for (x = init_x; x < width; x++) |
450 | dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val); |
451 | height--; |
452 | } |
453 | } |
454 | |
455 | { |
456 | int save_upper_left = !diag_edge[0] && sao_eo_class == SAO_EO_135D && !borders[0] && !borders[1]; |
457 | int save_upper_right = !diag_edge[1] && sao_eo_class == SAO_EO_45D && !borders[1] && !borders[2]; |
458 | int save_lower_right = !diag_edge[2] && sao_eo_class == SAO_EO_135D && !borders[2] && !borders[3]; |
459 | int save_lower_left = !diag_edge[3] && sao_eo_class == SAO_EO_45D && !borders[0] && !borders[3]; |
460 | |
461 | // Restore pixels that can't be modified |
462 | if(vert_edge[0] && sao_eo_class != SAO_EO_VERT) { |
463 | for(y = init_y+save_upper_left; y< height-save_lower_left; y++) |
464 | dst[y*stride_dst] = src[y*stride_src]; |
465 | } |
466 | if(vert_edge[1] && sao_eo_class != SAO_EO_VERT) { |
467 | for(y = init_y+save_upper_right; y< height-save_lower_right; y++) |
468 | dst[y*stride_dst+width-1] = src[y*stride_src+width-1]; |
469 | } |
470 | |
471 | if(horiz_edge[0] && sao_eo_class != SAO_EO_HORIZ) { |
472 | for(x = init_x+save_upper_left; x < width-save_upper_right; x++) |
473 | dst[x] = src[x]; |
474 | } |
475 | if(horiz_edge[1] && sao_eo_class != SAO_EO_HORIZ) { |
476 | for(x = init_x+save_lower_left; x < width-save_lower_right; x++) |
477 | dst[(height-1)*stride_dst+x] = src[(height-1)*stride_src+x]; |
478 | } |
479 | if(diag_edge[0] && sao_eo_class == SAO_EO_135D) |
480 | dst[0] = src[0]; |
481 | if(diag_edge[1] && sao_eo_class == SAO_EO_45D) |
482 | dst[width-1] = src[width-1]; |
483 | if(diag_edge[2] && sao_eo_class == SAO_EO_135D) |
484 | dst[stride_dst*(height-1)+width-1] = src[stride_src*(height-1)+width-1]; |
485 | if(diag_edge[3] && sao_eo_class == SAO_EO_45D) |
486 | dst[stride_dst*(height-1)] = src[stride_src*(height-1)]; |
487 | |
488 | } |
489 | } |
490 | |
491 | #undef CMP |
492 | |
493 | //////////////////////////////////////////////////////////////////////////////// |
494 | // |
495 | //////////////////////////////////////////////////////////////////////////////// |
496 | static void FUNC(put_hevc_pel_pixels)(int16_t *dst, |
497 | uint8_t *_src, ptrdiff_t _srcstride, |
498 | int height, intptr_t mx, intptr_t my, int width) |
499 | { |
500 | int x, y; |
501 | pixel *src = (pixel *)_src; |
502 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
503 | |
504 | for (y = 0; y < height; y++) { |
505 | for (x = 0; x < width; x++) |
506 | dst[x] = src[x] << (14 - BIT_DEPTH); |
507 | src += srcstride; |
508 | dst += MAX_PB_SIZE; |
509 | } |
510 | } |
511 | |
512 | static void FUNC(put_hevc_pel_uni_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
513 | int height, intptr_t mx, intptr_t my, int width) |
514 | { |
515 | int y; |
516 | pixel *src = (pixel *)_src; |
517 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
518 | pixel *dst = (pixel *)_dst; |
519 | ptrdiff_t dststride = _dststride / sizeof(pixel); |
520 | |
521 | for (y = 0; y < height; y++) { |
522 | memcpy(dst, src, width * sizeof(pixel)); |
523 | src += srcstride; |
524 | dst += dststride; |
525 | } |
526 | } |
527 | |
528 | static void FUNC(put_hevc_pel_bi_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
529 | int16_t *src2, |
530 | int height, intptr_t mx, intptr_t my, int width) |
531 | { |
532 | int x, y; |
533 | pixel *src = (pixel *)_src; |
534 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
535 | pixel *dst = (pixel *)_dst; |
536 | ptrdiff_t dststride = _dststride / sizeof(pixel); |
537 | |
538 | int shift = 14 + 1 - BIT_DEPTH; |
539 | #if BIT_DEPTH < 14 |
540 | int offset = 1 << (shift - 1); |
541 | #else |
542 | int offset = 0; |
543 | #endif |
544 | |
545 | for (y = 0; y < height; y++) { |
546 | for (x = 0; x < width; x++) |
547 | dst[x] = av_clip_pixel(((src[x] << (14 - BIT_DEPTH)) + src2[x] + offset) >> shift); |
548 | src += srcstride; |
549 | dst += dststride; |
550 | src2 += MAX_PB_SIZE; |
551 | } |
552 | } |
553 | |
554 | static void FUNC(put_hevc_pel_uni_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
555 | int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width) |
556 | { |
557 | int x, y; |
558 | pixel *src = (pixel *)_src; |
559 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
560 | pixel *dst = (pixel *)_dst; |
561 | ptrdiff_t dststride = _dststride / sizeof(pixel); |
562 | int shift = denom + 14 - BIT_DEPTH; |
563 | #if BIT_DEPTH < 14 |
564 | int offset = 1 << (shift - 1); |
565 | #else |
566 | int offset = 0; |
567 | #endif |
568 | |
569 | ox = ox * (1 << (BIT_DEPTH - 8)); |
570 | for (y = 0; y < height; y++) { |
571 | for (x = 0; x < width; x++) |
572 | dst[x] = av_clip_pixel((((src[x] << (14 - BIT_DEPTH)) * wx + offset) >> shift) + ox); |
573 | src += srcstride; |
574 | dst += dststride; |
575 | } |
576 | } |
577 | |
578 | static void FUNC(put_hevc_pel_bi_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
579 | int16_t *src2, |
580 | int height, int denom, int wx0, int wx1, |
581 | int ox0, int ox1, intptr_t mx, intptr_t my, int width) |
582 | { |
583 | int x, y; |
584 | pixel *src = (pixel *)_src; |
585 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
586 | pixel *dst = (pixel *)_dst; |
587 | ptrdiff_t dststride = _dststride / sizeof(pixel); |
588 | |
589 | int shift = 14 + 1 - BIT_DEPTH; |
590 | int log2Wd = denom + shift - 1; |
591 | |
592 | ox0 = ox0 * (1 << (BIT_DEPTH - 8)); |
593 | ox1 = ox1 * (1 << (BIT_DEPTH - 8)); |
594 | for (y = 0; y < height; y++) { |
595 | for (x = 0; x < width; x++) { |
596 | dst[x] = av_clip_pixel(( (src[x] << (14 - BIT_DEPTH)) * wx1 + src2[x] * wx0 + ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1)); |
597 | } |
598 | src += srcstride; |
599 | dst += dststride; |
600 | src2 += MAX_PB_SIZE; |
601 | } |
602 | } |
603 | |
604 | //////////////////////////////////////////////////////////////////////////////// |
605 | // |
606 | //////////////////////////////////////////////////////////////////////////////// |
607 | #define QPEL_FILTER(src, stride) \ |
608 | (filter[0] * src[x - 3 * stride] + \ |
609 | filter[1] * src[x - 2 * stride] + \ |
610 | filter[2] * src[x - stride] + \ |
611 | filter[3] * src[x ] + \ |
612 | filter[4] * src[x + stride] + \ |
613 | filter[5] * src[x + 2 * stride] + \ |
614 | filter[6] * src[x + 3 * stride] + \ |
615 | filter[7] * src[x + 4 * stride]) |
616 | |
617 | static void FUNC(put_hevc_qpel_h)(int16_t *dst, |
618 | uint8_t *_src, ptrdiff_t _srcstride, |
619 | int height, intptr_t mx, intptr_t my, int width) |
620 | { |
621 | int x, y; |
622 | pixel *src = (pixel*)_src; |
623 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
624 | const int8_t *filter = ff_hevc_qpel_filters[mx - 1]; |
625 | for (y = 0; y < height; y++) { |
626 | for (x = 0; x < width; x++) |
627 | dst[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); |
628 | src += srcstride; |
629 | dst += MAX_PB_SIZE; |
630 | } |
631 | } |
632 | |
633 | static void FUNC(put_hevc_qpel_v)(int16_t *dst, |
634 | uint8_t *_src, ptrdiff_t _srcstride, |
635 | int height, intptr_t mx, intptr_t my, int width) |
636 | { |
637 | int x, y; |
638 | pixel *src = (pixel*)_src; |
639 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
640 | const int8_t *filter = ff_hevc_qpel_filters[my - 1]; |
641 | for (y = 0; y < height; y++) { |
642 | for (x = 0; x < width; x++) |
643 | dst[x] = QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8); |
644 | src += srcstride; |
645 | dst += MAX_PB_SIZE; |
646 | } |
647 | } |
648 | |
649 | static void FUNC(put_hevc_qpel_hv)(int16_t *dst, |
650 | uint8_t *_src, |
651 | ptrdiff_t _srcstride, |
652 | int height, intptr_t mx, |
653 | intptr_t my, int width) |
654 | { |
655 | int x, y; |
656 | const int8_t *filter; |
657 | pixel *src = (pixel*)_src; |
658 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
659 | int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; |
660 | int16_t *tmp = tmp_array; |
661 | |
662 | src -= QPEL_EXTRA_BEFORE * srcstride; |
663 | filter = ff_hevc_qpel_filters[mx - 1]; |
664 | for (y = 0; y < height + QPEL_EXTRA; y++) { |
665 | for (x = 0; x < width; x++) |
666 | tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); |
667 | src += srcstride; |
668 | tmp += MAX_PB_SIZE; |
669 | } |
670 | |
671 | tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE; |
672 | filter = ff_hevc_qpel_filters[my - 1]; |
673 | for (y = 0; y < height; y++) { |
674 | for (x = 0; x < width; x++) |
675 | dst[x] = QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6; |
676 | tmp += MAX_PB_SIZE; |
677 | dst += MAX_PB_SIZE; |
678 | } |
679 | } |
680 | |
681 | static void FUNC(put_hevc_qpel_uni_h)(uint8_t *_dst, ptrdiff_t _dststride, |
682 | uint8_t *_src, ptrdiff_t _srcstride, |
683 | int height, intptr_t mx, intptr_t my, int width) |
684 | { |
685 | int x, y; |
686 | pixel *src = (pixel*)_src; |
687 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
688 | pixel *dst = (pixel *)_dst; |
689 | ptrdiff_t dststride = _dststride / sizeof(pixel); |
690 | const int8_t *filter = ff_hevc_qpel_filters[mx - 1]; |
691 | int shift = 14 - BIT_DEPTH; |
692 | |
693 | #if BIT_DEPTH < 14 |
694 | int offset = 1 << (shift - 1); |
695 | #else |
696 | int offset = 0; |
697 | #endif |
698 | |
699 | for (y = 0; y < height; y++) { |
700 | for (x = 0; x < width; x++) |
701 | dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift); |
702 | src += srcstride; |
703 | dst += dststride; |
704 | } |
705 | } |
706 | |
707 | static void FUNC(put_hevc_qpel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
708 | int16_t *src2, |
709 | int height, intptr_t mx, intptr_t my, int width) |
710 | { |
711 | int x, y; |
712 | pixel *src = (pixel*)_src; |
713 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
714 | pixel *dst = (pixel *)_dst; |
715 | ptrdiff_t dststride = _dststride / sizeof(pixel); |
716 | |
717 | const int8_t *filter = ff_hevc_qpel_filters[mx - 1]; |
718 | |
719 | int shift = 14 + 1 - BIT_DEPTH; |
720 | #if BIT_DEPTH < 14 |
721 | int offset = 1 << (shift - 1); |
722 | #else |
723 | int offset = 0; |
724 | #endif |
725 | |
726 | for (y = 0; y < height; y++) { |
727 | for (x = 0; x < width; x++) |
728 | dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift); |
729 | src += srcstride; |
730 | dst += dststride; |
731 | src2 += MAX_PB_SIZE; |
732 | } |
733 | } |
734 | |
735 | static void FUNC(put_hevc_qpel_uni_v)(uint8_t *_dst, ptrdiff_t _dststride, |
736 | uint8_t *_src, ptrdiff_t _srcstride, |
737 | int height, intptr_t mx, intptr_t my, int width) |
738 | { |
739 | int x, y; |
740 | pixel *src = (pixel*)_src; |
741 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
742 | pixel *dst = (pixel *)_dst; |
743 | ptrdiff_t dststride = _dststride / sizeof(pixel); |
744 | const int8_t *filter = ff_hevc_qpel_filters[my - 1]; |
745 | int shift = 14 - BIT_DEPTH; |
746 | |
747 | #if BIT_DEPTH < 14 |
748 | int offset = 1 << (shift - 1); |
749 | #else |
750 | int offset = 0; |
751 | #endif |
752 | |
753 | for (y = 0; y < height; y++) { |
754 | for (x = 0; x < width; x++) |
755 | dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + offset) >> shift); |
756 | src += srcstride; |
757 | dst += dststride; |
758 | } |
759 | } |
760 | |
761 | |
762 | static void FUNC(put_hevc_qpel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
763 | int16_t *src2, |
764 | int height, intptr_t mx, intptr_t my, int width) |
765 | { |
766 | int x, y; |
767 | pixel *src = (pixel*)_src; |
768 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
769 | pixel *dst = (pixel *)_dst; |
770 | ptrdiff_t dststride = _dststride / sizeof(pixel); |
771 | |
772 | const int8_t *filter = ff_hevc_qpel_filters[my - 1]; |
773 | |
774 | int shift = 14 + 1 - BIT_DEPTH; |
775 | #if BIT_DEPTH < 14 |
776 | int offset = 1 << (shift - 1); |
777 | #else |
778 | int offset = 0; |
779 | #endif |
780 | |
781 | for (y = 0; y < height; y++) { |
782 | for (x = 0; x < width; x++) |
783 | dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift); |
784 | src += srcstride; |
785 | dst += dststride; |
786 | src2 += MAX_PB_SIZE; |
787 | } |
788 | } |
789 | |
790 | static void FUNC(put_hevc_qpel_uni_hv)(uint8_t *_dst, ptrdiff_t _dststride, |
791 | uint8_t *_src, ptrdiff_t _srcstride, |
792 | int height, intptr_t mx, intptr_t my, int width) |
793 | { |
794 | int x, y; |
795 | const int8_t *filter; |
796 | pixel *src = (pixel*)_src; |
797 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
798 | pixel *dst = (pixel *)_dst; |
799 | ptrdiff_t dststride = _dststride / sizeof(pixel); |
800 | int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; |
801 | int16_t *tmp = tmp_array; |
802 | int shift = 14 - BIT_DEPTH; |
803 | |
804 | #if BIT_DEPTH < 14 |
805 | int offset = 1 << (shift - 1); |
806 | #else |
807 | int offset = 0; |
808 | #endif |
809 | |
810 | src -= QPEL_EXTRA_BEFORE * srcstride; |
811 | filter = ff_hevc_qpel_filters[mx - 1]; |
812 | for (y = 0; y < height + QPEL_EXTRA; y++) { |
813 | for (x = 0; x < width; x++) |
814 | tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); |
815 | src += srcstride; |
816 | tmp += MAX_PB_SIZE; |
817 | } |
818 | |
819 | tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE; |
820 | filter = ff_hevc_qpel_filters[my - 1]; |
821 | |
822 | for (y = 0; y < height; y++) { |
823 | for (x = 0; x < width; x++) |
824 | dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift); |
825 | tmp += MAX_PB_SIZE; |
826 | dst += dststride; |
827 | } |
828 | } |
829 | |
830 | static void FUNC(put_hevc_qpel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
831 | int16_t *src2, |
832 | int height, intptr_t mx, intptr_t my, int width) |
833 | { |
834 | int x, y; |
835 | const int8_t *filter; |
836 | pixel *src = (pixel*)_src; |
837 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
838 | pixel *dst = (pixel *)_dst; |
839 | ptrdiff_t dststride = _dststride / sizeof(pixel); |
840 | int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; |
841 | int16_t *tmp = tmp_array; |
842 | int shift = 14 + 1 - BIT_DEPTH; |
843 | #if BIT_DEPTH < 14 |
844 | int offset = 1 << (shift - 1); |
845 | #else |
846 | int offset = 0; |
847 | #endif |
848 | |
849 | src -= QPEL_EXTRA_BEFORE * srcstride; |
850 | filter = ff_hevc_qpel_filters[mx - 1]; |
851 | for (y = 0; y < height + QPEL_EXTRA; y++) { |
852 | for (x = 0; x < width; x++) |
853 | tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); |
854 | src += srcstride; |
855 | tmp += MAX_PB_SIZE; |
856 | } |
857 | |
858 | tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE; |
859 | filter = ff_hevc_qpel_filters[my - 1]; |
860 | |
861 | for (y = 0; y < height; y++) { |
862 | for (x = 0; x < width; x++) |
863 | dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift); |
864 | tmp += MAX_PB_SIZE; |
865 | dst += dststride; |
866 | src2 += MAX_PB_SIZE; |
867 | } |
868 | } |
869 | |
870 | static void FUNC(put_hevc_qpel_uni_w_h)(uint8_t *_dst, ptrdiff_t _dststride, |
871 | uint8_t *_src, ptrdiff_t _srcstride, |
872 | int height, int denom, int wx, int ox, |
873 | intptr_t mx, intptr_t my, int width) |
874 | { |
875 | int x, y; |
876 | pixel *src = (pixel*)_src; |
877 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
878 | pixel *dst = (pixel *)_dst; |
879 | ptrdiff_t dststride = _dststride / sizeof(pixel); |
880 | const int8_t *filter = ff_hevc_qpel_filters[mx - 1]; |
881 | int shift = denom + 14 - BIT_DEPTH; |
882 | #if BIT_DEPTH < 14 |
883 | int offset = 1 << (shift - 1); |
884 | #else |
885 | int offset = 0; |
886 | #endif |
887 | |
888 | ox = ox * (1 << (BIT_DEPTH - 8)); |
889 | for (y = 0; y < height; y++) { |
890 | for (x = 0; x < width; x++) |
891 | dst[x] = av_clip_pixel((((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); |
892 | src += srcstride; |
893 | dst += dststride; |
894 | } |
895 | } |
896 | |
897 | static void FUNC(put_hevc_qpel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
898 | int16_t *src2, |
899 | int height, int denom, int wx0, int wx1, |
900 | int ox0, int ox1, intptr_t mx, intptr_t my, int width) |
901 | { |
902 | int x, y; |
903 | pixel *src = (pixel*)_src; |
904 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
905 | pixel *dst = (pixel *)_dst; |
906 | ptrdiff_t dststride = _dststride / sizeof(pixel); |
907 | |
908 | const int8_t *filter = ff_hevc_qpel_filters[mx - 1]; |
909 | |
910 | int shift = 14 + 1 - BIT_DEPTH; |
911 | int log2Wd = denom + shift - 1; |
912 | |
913 | ox0 = ox0 * (1 << (BIT_DEPTH - 8)); |
914 | ox1 = ox1 * (1 << (BIT_DEPTH - 8)); |
915 | for (y = 0; y < height; y++) { |
916 | for (x = 0; x < width; x++) |
917 | dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 + |
918 | ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1)); |
919 | src += srcstride; |
920 | dst += dststride; |
921 | src2 += MAX_PB_SIZE; |
922 | } |
923 | } |
924 | |
925 | static void FUNC(put_hevc_qpel_uni_w_v)(uint8_t *_dst, ptrdiff_t _dststride, |
926 | uint8_t *_src, ptrdiff_t _srcstride, |
927 | int height, int denom, int wx, int ox, |
928 | intptr_t mx, intptr_t my, int width) |
929 | { |
930 | int x, y; |
931 | pixel *src = (pixel*)_src; |
932 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
933 | pixel *dst = (pixel *)_dst; |
934 | ptrdiff_t dststride = _dststride / sizeof(pixel); |
935 | const int8_t *filter = ff_hevc_qpel_filters[my - 1]; |
936 | int shift = denom + 14 - BIT_DEPTH; |
937 | #if BIT_DEPTH < 14 |
938 | int offset = 1 << (shift - 1); |
939 | #else |
940 | int offset = 0; |
941 | #endif |
942 | |
943 | ox = ox * (1 << (BIT_DEPTH - 8)); |
944 | for (y = 0; y < height; y++) { |
945 | for (x = 0; x < width; x++) |
946 | dst[x] = av_clip_pixel((((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); |
947 | src += srcstride; |
948 | dst += dststride; |
949 | } |
950 | } |
951 | |
952 | static void FUNC(put_hevc_qpel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
953 | int16_t *src2, |
954 | int height, int denom, int wx0, int wx1, |
955 | int ox0, int ox1, intptr_t mx, intptr_t my, int width) |
956 | { |
957 | int x, y; |
958 | pixel *src = (pixel*)_src; |
959 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
960 | pixel *dst = (pixel *)_dst; |
961 | ptrdiff_t dststride = _dststride / sizeof(pixel); |
962 | |
963 | const int8_t *filter = ff_hevc_qpel_filters[my - 1]; |
964 | |
965 | int shift = 14 + 1 - BIT_DEPTH; |
966 | int log2Wd = denom + shift - 1; |
967 | |
968 | ox0 = ox0 * (1 << (BIT_DEPTH - 8)); |
969 | ox1 = ox1 * (1 << (BIT_DEPTH - 8)); |
970 | for (y = 0; y < height; y++) { |
971 | for (x = 0; x < width; x++) |
972 | dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 + |
973 | ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1)); |
974 | src += srcstride; |
975 | dst += dststride; |
976 | src2 += MAX_PB_SIZE; |
977 | } |
978 | } |
979 | |
980 | static void FUNC(put_hevc_qpel_uni_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, |
981 | uint8_t *_src, ptrdiff_t _srcstride, |
982 | int height, int denom, int wx, int ox, |
983 | intptr_t mx, intptr_t my, int width) |
984 | { |
985 | int x, y; |
986 | const int8_t *filter; |
987 | pixel *src = (pixel*)_src; |
988 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
989 | pixel *dst = (pixel *)_dst; |
990 | ptrdiff_t dststride = _dststride / sizeof(pixel); |
991 | int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; |
992 | int16_t *tmp = tmp_array; |
993 | int shift = denom + 14 - BIT_DEPTH; |
994 | #if BIT_DEPTH < 14 |
995 | int offset = 1 << (shift - 1); |
996 | #else |
997 | int offset = 0; |
998 | #endif |
999 | |
1000 | src -= QPEL_EXTRA_BEFORE * srcstride; |
1001 | filter = ff_hevc_qpel_filters[mx - 1]; |
1002 | for (y = 0; y < height + QPEL_EXTRA; y++) { |
1003 | for (x = 0; x < width; x++) |
1004 | tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); |
1005 | src += srcstride; |
1006 | tmp += MAX_PB_SIZE; |
1007 | } |
1008 | |
1009 | tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE; |
1010 | filter = ff_hevc_qpel_filters[my - 1]; |
1011 | |
1012 | ox = ox * (1 << (BIT_DEPTH - 8)); |
1013 | for (y = 0; y < height; y++) { |
1014 | for (x = 0; x < width; x++) |
1015 | dst[x] = av_clip_pixel((((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox); |
1016 | tmp += MAX_PB_SIZE; |
1017 | dst += dststride; |
1018 | } |
1019 | } |
1020 | |
1021 | static void FUNC(put_hevc_qpel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
1022 | int16_t *src2, |
1023 | int height, int denom, int wx0, int wx1, |
1024 | int ox0, int ox1, intptr_t mx, intptr_t my, int width) |
1025 | { |
1026 | int x, y; |
1027 | const int8_t *filter; |
1028 | pixel *src = (pixel*)_src; |
1029 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
1030 | pixel *dst = (pixel *)_dst; |
1031 | ptrdiff_t dststride = _dststride / sizeof(pixel); |
1032 | int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; |
1033 | int16_t *tmp = tmp_array; |
1034 | int shift = 14 + 1 - BIT_DEPTH; |
1035 | int log2Wd = denom + shift - 1; |
1036 | |
1037 | src -= QPEL_EXTRA_BEFORE * srcstride; |
1038 | filter = ff_hevc_qpel_filters[mx - 1]; |
1039 | for (y = 0; y < height + QPEL_EXTRA; y++) { |
1040 | for (x = 0; x < width; x++) |
1041 | tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); |
1042 | src += srcstride; |
1043 | tmp += MAX_PB_SIZE; |
1044 | } |
1045 | |
1046 | tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE; |
1047 | filter = ff_hevc_qpel_filters[my - 1]; |
1048 | |
1049 | ox0 = ox0 * (1 << (BIT_DEPTH - 8)); |
1050 | ox1 = ox1 * (1 << (BIT_DEPTH - 8)); |
1051 | for (y = 0; y < height; y++) { |
1052 | for (x = 0; x < width; x++) |
1053 | dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 + |
1054 | ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1)); |
1055 | tmp += MAX_PB_SIZE; |
1056 | dst += dststride; |
1057 | src2 += MAX_PB_SIZE; |
1058 | } |
1059 | } |
1060 | |
1061 | //////////////////////////////////////////////////////////////////////////////// |
1062 | // |
1063 | //////////////////////////////////////////////////////////////////////////////// |
1064 | #define EPEL_FILTER(src, stride) \ |
1065 | (filter[0] * src[x - stride] + \ |
1066 | filter[1] * src[x] + \ |
1067 | filter[2] * src[x + stride] + \ |
1068 | filter[3] * src[x + 2 * stride]) |
1069 | |
1070 | static void FUNC(put_hevc_epel_h)(int16_t *dst, |
1071 | uint8_t *_src, ptrdiff_t _srcstride, |
1072 | int height, intptr_t mx, intptr_t my, int width) |
1073 | { |
1074 | int x, y; |
1075 | pixel *src = (pixel *)_src; |
1076 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
1077 | const int8_t *filter = ff_hevc_epel_filters[mx - 1]; |
1078 | for (y = 0; y < height; y++) { |
1079 | for (x = 0; x < width; x++) |
1080 | dst[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); |
1081 | src += srcstride; |
1082 | dst += MAX_PB_SIZE; |
1083 | } |
1084 | } |
1085 | |
1086 | static void FUNC(put_hevc_epel_v)(int16_t *dst, |
1087 | uint8_t *_src, ptrdiff_t _srcstride, |
1088 | int height, intptr_t mx, intptr_t my, int width) |
1089 | { |
1090 | int x, y; |
1091 | pixel *src = (pixel *)_src; |
1092 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
1093 | const int8_t *filter = ff_hevc_epel_filters[my - 1]; |
1094 | |
1095 | for (y = 0; y < height; y++) { |
1096 | for (x = 0; x < width; x++) |
1097 | dst[x] = EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8); |
1098 | src += srcstride; |
1099 | dst += MAX_PB_SIZE; |
1100 | } |
1101 | } |
1102 | |
1103 | static void FUNC(put_hevc_epel_hv)(int16_t *dst, |
1104 | uint8_t *_src, ptrdiff_t _srcstride, |
1105 | int height, intptr_t mx, intptr_t my, int width) |
1106 | { |
1107 | int x, y; |
1108 | pixel *src = (pixel *)_src; |
1109 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
1110 | const int8_t *filter = ff_hevc_epel_filters[mx - 1]; |
1111 | int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE]; |
1112 | int16_t *tmp = tmp_array; |
1113 | |
1114 | src -= EPEL_EXTRA_BEFORE * srcstride; |
1115 | |
1116 | for (y = 0; y < height + EPEL_EXTRA; y++) { |
1117 | for (x = 0; x < width; x++) |
1118 | tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); |
1119 | src += srcstride; |
1120 | tmp += MAX_PB_SIZE; |
1121 | } |
1122 | |
1123 | tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE; |
1124 | filter = ff_hevc_epel_filters[my - 1]; |
1125 | |
1126 | for (y = 0; y < height; y++) { |
1127 | for (x = 0; x < width; x++) |
1128 | dst[x] = EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6; |
1129 | tmp += MAX_PB_SIZE; |
1130 | dst += MAX_PB_SIZE; |
1131 | } |
1132 | } |
1133 | |
1134 | static void FUNC(put_hevc_epel_uni_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
1135 | int height, intptr_t mx, intptr_t my, int width) |
1136 | { |
1137 | int x, y; |
1138 | pixel *src = (pixel *)_src; |
1139 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
1140 | pixel *dst = (pixel *)_dst; |
1141 | ptrdiff_t dststride = _dststride / sizeof(pixel); |
1142 | const int8_t *filter = ff_hevc_epel_filters[mx - 1]; |
1143 | int shift = 14 - BIT_DEPTH; |
1144 | #if BIT_DEPTH < 14 |
1145 | int offset = 1 << (shift - 1); |
1146 | #else |
1147 | int offset = 0; |
1148 | #endif |
1149 | |
1150 | for (y = 0; y < height; y++) { |
1151 | for (x = 0; x < width; x++) |
1152 | dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift); |
1153 | src += srcstride; |
1154 | dst += dststride; |
1155 | } |
1156 | } |
1157 | |
1158 | static void FUNC(put_hevc_epel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
1159 | int16_t *src2, |
1160 | int height, intptr_t mx, intptr_t my, int width) |
1161 | { |
1162 | int x, y; |
1163 | pixel *src = (pixel *)_src; |
1164 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
1165 | pixel *dst = (pixel *)_dst; |
1166 | ptrdiff_t dststride = _dststride / sizeof(pixel); |
1167 | const int8_t *filter = ff_hevc_epel_filters[mx - 1]; |
1168 | int shift = 14 + 1 - BIT_DEPTH; |
1169 | #if BIT_DEPTH < 14 |
1170 | int offset = 1 << (shift - 1); |
1171 | #else |
1172 | int offset = 0; |
1173 | #endif |
1174 | |
1175 | for (y = 0; y < height; y++) { |
1176 | for (x = 0; x < width; x++) { |
1177 | dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift); |
1178 | } |
1179 | dst += dststride; |
1180 | src += srcstride; |
1181 | src2 += MAX_PB_SIZE; |
1182 | } |
1183 | } |
1184 | |
1185 | static void FUNC(put_hevc_epel_uni_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
1186 | int height, intptr_t mx, intptr_t my, int width) |
1187 | { |
1188 | int x, y; |
1189 | pixel *src = (pixel *)_src; |
1190 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
1191 | pixel *dst = (pixel *)_dst; |
1192 | ptrdiff_t dststride = _dststride / sizeof(pixel); |
1193 | const int8_t *filter = ff_hevc_epel_filters[my - 1]; |
1194 | int shift = 14 - BIT_DEPTH; |
1195 | #if BIT_DEPTH < 14 |
1196 | int offset = 1 << (shift - 1); |
1197 | #else |
1198 | int offset = 0; |
1199 | #endif |
1200 | |
1201 | for (y = 0; y < height; y++) { |
1202 | for (x = 0; x < width; x++) |
1203 | dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + offset) >> shift); |
1204 | src += srcstride; |
1205 | dst += dststride; |
1206 | } |
1207 | } |
1208 | |
1209 | static void FUNC(put_hevc_epel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
1210 | int16_t *src2, |
1211 | int height, intptr_t mx, intptr_t my, int width) |
1212 | { |
1213 | int x, y; |
1214 | pixel *src = (pixel *)_src; |
1215 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
1216 | const int8_t *filter = ff_hevc_epel_filters[my - 1]; |
1217 | pixel *dst = (pixel *)_dst; |
1218 | ptrdiff_t dststride = _dststride / sizeof(pixel); |
1219 | int shift = 14 + 1 - BIT_DEPTH; |
1220 | #if BIT_DEPTH < 14 |
1221 | int offset = 1 << (shift - 1); |
1222 | #else |
1223 | int offset = 0; |
1224 | #endif |
1225 | |
1226 | for (y = 0; y < height; y++) { |
1227 | for (x = 0; x < width; x++) |
1228 | dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift); |
1229 | dst += dststride; |
1230 | src += srcstride; |
1231 | src2 += MAX_PB_SIZE; |
1232 | } |
1233 | } |
1234 | |
1235 | static void FUNC(put_hevc_epel_uni_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
1236 | int height, intptr_t mx, intptr_t my, int width) |
1237 | { |
1238 | int x, y; |
1239 | pixel *src = (pixel *)_src; |
1240 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
1241 | pixel *dst = (pixel *)_dst; |
1242 | ptrdiff_t dststride = _dststride / sizeof(pixel); |
1243 | const int8_t *filter = ff_hevc_epel_filters[mx - 1]; |
1244 | int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE]; |
1245 | int16_t *tmp = tmp_array; |
1246 | int shift = 14 - BIT_DEPTH; |
1247 | #if BIT_DEPTH < 14 |
1248 | int offset = 1 << (shift - 1); |
1249 | #else |
1250 | int offset = 0; |
1251 | #endif |
1252 | |
1253 | src -= EPEL_EXTRA_BEFORE * srcstride; |
1254 | |
1255 | for (y = 0; y < height + EPEL_EXTRA; y++) { |
1256 | for (x = 0; x < width; x++) |
1257 | tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); |
1258 | src += srcstride; |
1259 | tmp += MAX_PB_SIZE; |
1260 | } |
1261 | |
1262 | tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE; |
1263 | filter = ff_hevc_epel_filters[my - 1]; |
1264 | |
1265 | for (y = 0; y < height; y++) { |
1266 | for (x = 0; x < width; x++) |
1267 | dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift); |
1268 | tmp += MAX_PB_SIZE; |
1269 | dst += dststride; |
1270 | } |
1271 | } |
1272 | |
1273 | static void FUNC(put_hevc_epel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
1274 | int16_t *src2, |
1275 | int height, intptr_t mx, intptr_t my, int width) |
1276 | { |
1277 | int x, y; |
1278 | pixel *src = (pixel *)_src; |
1279 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
1280 | pixel *dst = (pixel *)_dst; |
1281 | ptrdiff_t dststride = _dststride / sizeof(pixel); |
1282 | const int8_t *filter = ff_hevc_epel_filters[mx - 1]; |
1283 | int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE]; |
1284 | int16_t *tmp = tmp_array; |
1285 | int shift = 14 + 1 - BIT_DEPTH; |
1286 | #if BIT_DEPTH < 14 |
1287 | int offset = 1 << (shift - 1); |
1288 | #else |
1289 | int offset = 0; |
1290 | #endif |
1291 | |
1292 | src -= EPEL_EXTRA_BEFORE * srcstride; |
1293 | |
1294 | for (y = 0; y < height + EPEL_EXTRA; y++) { |
1295 | for (x = 0; x < width; x++) |
1296 | tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); |
1297 | src += srcstride; |
1298 | tmp += MAX_PB_SIZE; |
1299 | } |
1300 | |
1301 | tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE; |
1302 | filter = ff_hevc_epel_filters[my - 1]; |
1303 | |
1304 | for (y = 0; y < height; y++) { |
1305 | for (x = 0; x < width; x++) |
1306 | dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift); |
1307 | tmp += MAX_PB_SIZE; |
1308 | dst += dststride; |
1309 | src2 += MAX_PB_SIZE; |
1310 | } |
1311 | } |
1312 | |
1313 | static void FUNC(put_hevc_epel_uni_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
1314 | int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width) |
1315 | { |
1316 | int x, y; |
1317 | pixel *src = (pixel *)_src; |
1318 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
1319 | pixel *dst = (pixel *)_dst; |
1320 | ptrdiff_t dststride = _dststride / sizeof(pixel); |
1321 | const int8_t *filter = ff_hevc_epel_filters[mx - 1]; |
1322 | int shift = denom + 14 - BIT_DEPTH; |
1323 | #if BIT_DEPTH < 14 |
1324 | int offset = 1 << (shift - 1); |
1325 | #else |
1326 | int offset = 0; |
1327 | #endif |
1328 | |
1329 | ox = ox * (1 << (BIT_DEPTH - 8)); |
1330 | for (y = 0; y < height; y++) { |
1331 | for (x = 0; x < width; x++) { |
1332 | dst[x] = av_clip_pixel((((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); |
1333 | } |
1334 | dst += dststride; |
1335 | src += srcstride; |
1336 | } |
1337 | } |
1338 | |
1339 | static void FUNC(put_hevc_epel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
1340 | int16_t *src2, |
1341 | int height, int denom, int wx0, int wx1, |
1342 | int ox0, int ox1, intptr_t mx, intptr_t my, int width) |
1343 | { |
1344 | int x, y; |
1345 | pixel *src = (pixel *)_src; |
1346 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
1347 | pixel *dst = (pixel *)_dst; |
1348 | ptrdiff_t dststride = _dststride / sizeof(pixel); |
1349 | const int8_t *filter = ff_hevc_epel_filters[mx - 1]; |
1350 | int shift = 14 + 1 - BIT_DEPTH; |
1351 | int log2Wd = denom + shift - 1; |
1352 | |
1353 | ox0 = ox0 * (1 << (BIT_DEPTH - 8)); |
1354 | ox1 = ox1 * (1 << (BIT_DEPTH - 8)); |
1355 | for (y = 0; y < height; y++) { |
1356 | for (x = 0; x < width; x++) |
1357 | dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 + |
1358 | ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1)); |
1359 | src += srcstride; |
1360 | dst += dststride; |
1361 | src2 += MAX_PB_SIZE; |
1362 | } |
1363 | } |
1364 | |
1365 | static void FUNC(put_hevc_epel_uni_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
1366 | int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width) |
1367 | { |
1368 | int x, y; |
1369 | pixel *src = (pixel *)_src; |
1370 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
1371 | pixel *dst = (pixel *)_dst; |
1372 | ptrdiff_t dststride = _dststride / sizeof(pixel); |
1373 | const int8_t *filter = ff_hevc_epel_filters[my - 1]; |
1374 | int shift = denom + 14 - BIT_DEPTH; |
1375 | #if BIT_DEPTH < 14 |
1376 | int offset = 1 << (shift - 1); |
1377 | #else |
1378 | int offset = 0; |
1379 | #endif |
1380 | |
1381 | ox = ox * (1 << (BIT_DEPTH - 8)); |
1382 | for (y = 0; y < height; y++) { |
1383 | for (x = 0; x < width; x++) { |
1384 | dst[x] = av_clip_pixel((((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); |
1385 | } |
1386 | dst += dststride; |
1387 | src += srcstride; |
1388 | } |
1389 | } |
1390 | |
1391 | static void FUNC(put_hevc_epel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
1392 | int16_t *src2, |
1393 | int height, int denom, int wx0, int wx1, |
1394 | int ox0, int ox1, intptr_t mx, intptr_t my, int width) |
1395 | { |
1396 | int x, y; |
1397 | pixel *src = (pixel *)_src; |
1398 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
1399 | const int8_t *filter = ff_hevc_epel_filters[my - 1]; |
1400 | pixel *dst = (pixel *)_dst; |
1401 | ptrdiff_t dststride = _dststride / sizeof(pixel); |
1402 | int shift = 14 + 1 - BIT_DEPTH; |
1403 | int log2Wd = denom + shift - 1; |
1404 | |
1405 | ox0 = ox0 * (1 << (BIT_DEPTH - 8)); |
1406 | ox1 = ox1 * (1 << (BIT_DEPTH - 8)); |
1407 | for (y = 0; y < height; y++) { |
1408 | for (x = 0; x < width; x++) |
1409 | dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 + |
1410 | ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1)); |
1411 | src += srcstride; |
1412 | dst += dststride; |
1413 | src2 += MAX_PB_SIZE; |
1414 | } |
1415 | } |
1416 | |
1417 | static void FUNC(put_hevc_epel_uni_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
1418 | int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width) |
1419 | { |
1420 | int x, y; |
1421 | pixel *src = (pixel *)_src; |
1422 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
1423 | pixel *dst = (pixel *)_dst; |
1424 | ptrdiff_t dststride = _dststride / sizeof(pixel); |
1425 | const int8_t *filter = ff_hevc_epel_filters[mx - 1]; |
1426 | int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE]; |
1427 | int16_t *tmp = tmp_array; |
1428 | int shift = denom + 14 - BIT_DEPTH; |
1429 | #if BIT_DEPTH < 14 |
1430 | int offset = 1 << (shift - 1); |
1431 | #else |
1432 | int offset = 0; |
1433 | #endif |
1434 | |
1435 | src -= EPEL_EXTRA_BEFORE * srcstride; |
1436 | |
1437 | for (y = 0; y < height + EPEL_EXTRA; y++) { |
1438 | for (x = 0; x < width; x++) |
1439 | tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); |
1440 | src += srcstride; |
1441 | tmp += MAX_PB_SIZE; |
1442 | } |
1443 | |
1444 | tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE; |
1445 | filter = ff_hevc_epel_filters[my - 1]; |
1446 | |
1447 | ox = ox * (1 << (BIT_DEPTH - 8)); |
1448 | for (y = 0; y < height; y++) { |
1449 | for (x = 0; x < width; x++) |
1450 | dst[x] = av_clip_pixel((((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox); |
1451 | tmp += MAX_PB_SIZE; |
1452 | dst += dststride; |
1453 | } |
1454 | } |
1455 | |
1456 | static void FUNC(put_hevc_epel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
1457 | int16_t *src2, |
1458 | int height, int denom, int wx0, int wx1, |
1459 | int ox0, int ox1, intptr_t mx, intptr_t my, int width) |
1460 | { |
1461 | int x, y; |
1462 | pixel *src = (pixel *)_src; |
1463 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
1464 | pixel *dst = (pixel *)_dst; |
1465 | ptrdiff_t dststride = _dststride / sizeof(pixel); |
1466 | const int8_t *filter = ff_hevc_epel_filters[mx - 1]; |
1467 | int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE]; |
1468 | int16_t *tmp = tmp_array; |
1469 | int shift = 14 + 1 - BIT_DEPTH; |
1470 | int log2Wd = denom + shift - 1; |
1471 | |
1472 | src -= EPEL_EXTRA_BEFORE * srcstride; |
1473 | |
1474 | for (y = 0; y < height + EPEL_EXTRA; y++) { |
1475 | for (x = 0; x < width; x++) |
1476 | tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); |
1477 | src += srcstride; |
1478 | tmp += MAX_PB_SIZE; |
1479 | } |
1480 | |
1481 | tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE; |
1482 | filter = ff_hevc_epel_filters[my - 1]; |
1483 | |
1484 | ox0 = ox0 * (1 << (BIT_DEPTH - 8)); |
1485 | ox1 = ox1 * (1 << (BIT_DEPTH - 8)); |
1486 | for (y = 0; y < height; y++) { |
1487 | for (x = 0; x < width; x++) |
1488 | dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 + |
1489 | ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1)); |
1490 | tmp += MAX_PB_SIZE; |
1491 | dst += dststride; |
1492 | src2 += MAX_PB_SIZE; |
1493 | } |
1494 | } |
1495 | |
1496 | // line zero |
1497 | #define P3 pix[-4 * xstride] |
1498 | #define P2 pix[-3 * xstride] |
1499 | #define P1 pix[-2 * xstride] |
1500 | #define P0 pix[-1 * xstride] |
1501 | #define Q0 pix[0 * xstride] |
1502 | #define Q1 pix[1 * xstride] |
1503 | #define Q2 pix[2 * xstride] |
1504 | #define Q3 pix[3 * xstride] |
1505 | |
1506 | // line three. used only for deblocking decision |
1507 | #define TP3 pix[-4 * xstride + 3 * ystride] |
1508 | #define TP2 pix[-3 * xstride + 3 * ystride] |
1509 | #define TP1 pix[-2 * xstride + 3 * ystride] |
1510 | #define TP0 pix[-1 * xstride + 3 * ystride] |
1511 | #define TQ0 pix[0 * xstride + 3 * ystride] |
1512 | #define TQ1 pix[1 * xstride + 3 * ystride] |
1513 | #define TQ2 pix[2 * xstride + 3 * ystride] |
1514 | #define TQ3 pix[3 * xstride + 3 * ystride] |
1515 | |
1516 | static void FUNC(hevc_loop_filter_luma)(uint8_t *_pix, |
1517 | ptrdiff_t _xstride, ptrdiff_t _ystride, |
1518 | int beta, int *_tc, |
1519 | uint8_t *_no_p, uint8_t *_no_q) |
1520 | { |
1521 | int d, j; |
1522 | pixel *pix = (pixel *)_pix; |
1523 | ptrdiff_t xstride = _xstride / sizeof(pixel); |
1524 | ptrdiff_t ystride = _ystride / sizeof(pixel); |
1525 | |
1526 | beta <<= BIT_DEPTH - 8; |
1527 | |
1528 | for (j = 0; j < 2; j++) { |
1529 | const int dp0 = abs(P2 - 2 * P1 + P0); |
1530 | const int dq0 = abs(Q2 - 2 * Q1 + Q0); |
1531 | const int dp3 = abs(TP2 - 2 * TP1 + TP0); |
1532 | const int dq3 = abs(TQ2 - 2 * TQ1 + TQ0); |
1533 | const int d0 = dp0 + dq0; |
1534 | const int d3 = dp3 + dq3; |
1535 | const int tc = _tc[j] << (BIT_DEPTH - 8); |
1536 | const int no_p = _no_p[j]; |
1537 | const int no_q = _no_q[j]; |
1538 | |
1539 | if (d0 + d3 >= beta) { |
1540 | pix += 4 * ystride; |
1541 | continue; |
1542 | } else { |
1543 | const int beta_3 = beta >> 3; |
1544 | const int beta_2 = beta >> 2; |
1545 | const int tc25 = ((tc * 5 + 1) >> 1); |
1546 | |
1547 | if (abs(P3 - P0) + abs(Q3 - Q0) < beta_3 && abs(P0 - Q0) < tc25 && |
1548 | abs(TP3 - TP0) + abs(TQ3 - TQ0) < beta_3 && abs(TP0 - TQ0) < tc25 && |
1549 | (d0 << 1) < beta_2 && (d3 << 1) < beta_2) { |
1550 | // strong filtering |
1551 | const int tc2 = tc << 1; |
1552 | for (d = 0; d < 4; d++) { |
1553 | const int p3 = P3; |
1554 | const int p2 = P2; |
1555 | const int p1 = P1; |
1556 | const int p0 = P0; |
1557 | const int q0 = Q0; |
1558 | const int q1 = Q1; |
1559 | const int q2 = Q2; |
1560 | const int q3 = Q3; |
1561 | if (!no_p) { |
1562 | P0 = p0 + av_clip(((p2 + 2 * p1 + 2 * p0 + 2 * q0 + q1 + 4) >> 3) - p0, -tc2, tc2); |
1563 | P1 = p1 + av_clip(((p2 + p1 + p0 + q0 + 2) >> 2) - p1, -tc2, tc2); |
1564 | P2 = p2 + av_clip(((2 * p3 + 3 * p2 + p1 + p0 + q0 + 4) >> 3) - p2, -tc2, tc2); |
1565 | } |
1566 | if (!no_q) { |
1567 | Q0 = q0 + av_clip(((p1 + 2 * p0 + 2 * q0 + 2 * q1 + q2 + 4) >> 3) - q0, -tc2, tc2); |
1568 | Q1 = q1 + av_clip(((p0 + q0 + q1 + q2 + 2) >> 2) - q1, -tc2, tc2); |
1569 | Q2 = q2 + av_clip(((2 * q3 + 3 * q2 + q1 + q0 + p0 + 4) >> 3) - q2, -tc2, tc2); |
1570 | } |
1571 | pix += ystride; |
1572 | } |
1573 | } else { // normal filtering |
1574 | int nd_p = 1; |
1575 | int nd_q = 1; |
1576 | const int tc_2 = tc >> 1; |
1577 | if (dp0 + dp3 < ((beta + (beta >> 1)) >> 3)) |
1578 | nd_p = 2; |
1579 | if (dq0 + dq3 < ((beta + (beta >> 1)) >> 3)) |
1580 | nd_q = 2; |
1581 | |
1582 | for (d = 0; d < 4; d++) { |
1583 | const int p2 = P2; |
1584 | const int p1 = P1; |
1585 | const int p0 = P0; |
1586 | const int q0 = Q0; |
1587 | const int q1 = Q1; |
1588 | const int q2 = Q2; |
1589 | int delta0 = (9 * (q0 - p0) - 3 * (q1 - p1) + 8) >> 4; |
1590 | if (abs(delta0) < 10 * tc) { |
1591 | delta0 = av_clip(delta0, -tc, tc); |
1592 | if (!no_p) |
1593 | P0 = av_clip_pixel(p0 + delta0); |
1594 | if (!no_q) |
1595 | Q0 = av_clip_pixel(q0 - delta0); |
1596 | if (!no_p && nd_p > 1) { |
1597 | const int deltap1 = av_clip((((p2 + p0 + 1) >> 1) - p1 + delta0) >> 1, -tc_2, tc_2); |
1598 | P1 = av_clip_pixel(p1 + deltap1); |
1599 | } |
1600 | if (!no_q && nd_q > 1) { |
1601 | const int deltaq1 = av_clip((((q2 + q0 + 1) >> 1) - q1 - delta0) >> 1, -tc_2, tc_2); |
1602 | Q1 = av_clip_pixel(q1 + deltaq1); |
1603 | } |
1604 | } |
1605 | pix += ystride; |
1606 | } |
1607 | } |
1608 | } |
1609 | } |
1610 | } |
1611 | |
1612 | static void FUNC(hevc_loop_filter_chroma)(uint8_t *_pix, ptrdiff_t _xstride, |
1613 | ptrdiff_t _ystride, int *_tc, |
1614 | uint8_t *_no_p, uint8_t *_no_q) |
1615 | { |
1616 | int d, j, no_p, no_q; |
1617 | pixel *pix = (pixel *)_pix; |
1618 | ptrdiff_t xstride = _xstride / sizeof(pixel); |
1619 | ptrdiff_t ystride = _ystride / sizeof(pixel); |
1620 | |
1621 | for (j = 0; j < 2; j++) { |
1622 | const int tc = _tc[j] << (BIT_DEPTH - 8); |
1623 | if (tc <= 0) { |
1624 | pix += 4 * ystride; |
1625 | continue; |
1626 | } |
1627 | no_p = _no_p[j]; |
1628 | no_q = _no_q[j]; |
1629 | |
1630 | for (d = 0; d < 4; d++) { |
1631 | int delta0; |
1632 | const int p1 = P1; |
1633 | const int p0 = P0; |
1634 | const int q0 = Q0; |
1635 | const int q1 = Q1; |
1636 | delta0 = av_clip((((q0 - p0) * 4) + p1 - q1 + 4) >> 3, -tc, tc); |
1637 | if (!no_p) |
1638 | P0 = av_clip_pixel(p0 + delta0); |
1639 | if (!no_q) |
1640 | Q0 = av_clip_pixel(q0 - delta0); |
1641 | pix += ystride; |
1642 | } |
1643 | } |
1644 | } |
1645 | |
1646 | static void FUNC(hevc_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride, |
1647 | int32_t *tc, uint8_t *no_p, |
1648 | uint8_t *no_q) |
1649 | { |
1650 | FUNC(hevc_loop_filter_chroma)(pix, stride, sizeof(pixel), tc, no_p, no_q); |
1651 | } |
1652 | |
1653 | static void FUNC(hevc_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride, |
1654 | int32_t *tc, uint8_t *no_p, |
1655 | uint8_t *no_q) |
1656 | { |
1657 | FUNC(hevc_loop_filter_chroma)(pix, sizeof(pixel), stride, tc, no_p, no_q); |
1658 | } |
1659 | |
1660 | static void FUNC(hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride, |
1661 | int beta, int32_t *tc, uint8_t *no_p, |
1662 | uint8_t *no_q) |
1663 | { |
1664 | FUNC(hevc_loop_filter_luma)(pix, stride, sizeof(pixel), |
1665 | beta, tc, no_p, no_q); |
1666 | } |
1667 | |
1668 | static void FUNC(hevc_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride, |
1669 | int beta, int32_t *tc, uint8_t *no_p, |
1670 | uint8_t *no_q) |
1671 | { |
1672 | FUNC(hevc_loop_filter_luma)(pix, sizeof(pixel), stride, |
1673 | beta, tc, no_p, no_q); |
1674 | } |
1675 | |
1676 | #undef P3 |
1677 | #undef P2 |
1678 | #undef P1 |
1679 | #undef P0 |
1680 | #undef Q0 |
1681 | #undef Q1 |
1682 | #undef Q2 |
1683 | #undef Q3 |
1684 | |
1685 | #undef TP3 |
1686 | #undef TP2 |
1687 | #undef TP1 |
1688 | #undef TP0 |
1689 | #undef TQ0 |
1690 | #undef TQ1 |
1691 | #undef TQ2 |
1692 | #undef TQ3 |
1693 |