blob: cd1209e209dcb1c69a9c23dc55914bf641e20e71
1 | /* |
2 | * Copyright (C) 2009 David Conrad |
3 | * |
4 | * This file is part of FFmpeg. |
5 | * |
6 | * FFmpeg is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU Lesser General Public |
8 | * License as published by the Free Software Foundation; either |
9 | * version 2.1 of the License, or (at your option) any later version. |
10 | * |
11 | * FFmpeg is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * Lesser General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU Lesser General Public |
17 | * License along with FFmpeg; if not, write to the Free Software |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | */ |
20 | |
21 | #include "avcodec.h" |
22 | #include "diracdsp.h" |
23 | |
24 | #define FILTER(src, stride) \ |
25 | ((21*((src)[ 0*stride] + (src)[1*stride]) \ |
26 | -7*((src)[-1*stride] + (src)[2*stride]) \ |
27 | +3*((src)[-2*stride] + (src)[3*stride]) \ |
28 | -1*((src)[-3*stride] + (src)[4*stride]) + 16) >> 5) |
29 | |
30 | static void dirac_hpel_filter(uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, const uint8_t *src, |
31 | int stride, int width, int height) |
32 | { |
33 | int x, y; |
34 | |
35 | for (y = 0; y < height; y++) { |
36 | for (x = -3; x < width+5; x++) |
37 | dstv[x] = av_clip_uint8(FILTER(src+x, stride)); |
38 | |
39 | for (x = 0; x < width; x++) |
40 | dstc[x] = av_clip_uint8(FILTER(dstv+x, 1)); |
41 | |
42 | for (x = 0; x < width; x++) |
43 | dsth[x] = av_clip_uint8(FILTER(src+x, 1)); |
44 | |
45 | src += stride; |
46 | dsth += stride; |
47 | dstv += stride; |
48 | dstc += stride; |
49 | } |
50 | } |
51 | |
52 | #define PIXOP_BILINEAR(PFX, OP, WIDTH) \ |
53 | static void ff_ ## PFX ## _dirac_pixels ## WIDTH ## _bilinear_c(uint8_t *dst, const uint8_t *src[5], int stride, int h) \ |
54 | { \ |
55 | int x; \ |
56 | const uint8_t *s0 = src[0]; \ |
57 | const uint8_t *s1 = src[1]; \ |
58 | const uint8_t *s2 = src[2]; \ |
59 | const uint8_t *s3 = src[3]; \ |
60 | const uint8_t *w = src[4]; \ |
61 | \ |
62 | while (h--) { \ |
63 | for (x = 0; x < WIDTH; x++) { \ |
64 | OP(dst[x], (s0[x]*w[0] + s1[x]*w[1] + s2[x]*w[2] + s3[x]*w[3] + 8) >> 4); \ |
65 | } \ |
66 | \ |
67 | dst += stride; \ |
68 | s0 += stride; \ |
69 | s1 += stride; \ |
70 | s2 += stride; \ |
71 | s3 += stride; \ |
72 | } \ |
73 | } |
74 | |
75 | #define OP_PUT(dst, val) (dst) = (val) |
76 | #define OP_AVG(dst, val) (dst) = (((dst) + (val) + 1)>>1) |
77 | |
78 | PIXOP_BILINEAR(put, OP_PUT, 8) |
79 | PIXOP_BILINEAR(put, OP_PUT, 16) |
80 | PIXOP_BILINEAR(put, OP_PUT, 32) |
81 | PIXOP_BILINEAR(avg, OP_AVG, 8) |
82 | PIXOP_BILINEAR(avg, OP_AVG, 16) |
83 | PIXOP_BILINEAR(avg, OP_AVG, 32) |
84 | |
85 | #define op_scale1(x) block[x] = av_clip_uint8( (block[x]*weight + (1<<(log2_denom-1))) >> log2_denom) |
86 | #define op_scale2(x) dst[x] = av_clip_uint8( (src[x]*weights + dst[x]*weightd + (1<<(log2_denom-1))) >> log2_denom) |
87 | |
88 | #define DIRAC_WEIGHT(W) \ |
89 | static void weight_dirac_pixels ## W ## _c(uint8_t *block, int stride, int log2_denom, \ |
90 | int weight, int h) { \ |
91 | int x; \ |
92 | while (h--) { \ |
93 | for (x = 0; x < W; x++) { \ |
94 | op_scale1(x); \ |
95 | op_scale1(x+1); \ |
96 | } \ |
97 | block += stride; \ |
98 | } \ |
99 | } \ |
100 | static void biweight_dirac_pixels ## W ## _c(uint8_t *dst, const uint8_t *src, int stride, int log2_denom, \ |
101 | int weightd, int weights, int h) { \ |
102 | int x; \ |
103 | while (h--) { \ |
104 | for (x = 0; x < W; x++) { \ |
105 | op_scale2(x); \ |
106 | op_scale2(x+1); \ |
107 | } \ |
108 | dst += stride; \ |
109 | src += stride; \ |
110 | } \ |
111 | } |
112 | |
113 | DIRAC_WEIGHT(8) |
114 | DIRAC_WEIGHT(16) |
115 | DIRAC_WEIGHT(32) |
116 | |
117 | #define ADD_OBMC(xblen) \ |
118 | static void add_obmc ## xblen ## _c(uint16_t *dst, const uint8_t *src, int stride, \ |
119 | const uint8_t *obmc_weight, int yblen) \ |
120 | { \ |
121 | int x; \ |
122 | while (yblen--) { \ |
123 | for (x = 0; x < xblen; x += 2) { \ |
124 | dst[x ] += src[x ] * obmc_weight[x ]; \ |
125 | dst[x+1] += src[x+1] * obmc_weight[x+1]; \ |
126 | } \ |
127 | dst += stride; \ |
128 | src += stride; \ |
129 | obmc_weight += 32; \ |
130 | } \ |
131 | } |
132 | |
133 | ADD_OBMC(8) |
134 | ADD_OBMC(16) |
135 | ADD_OBMC(32) |
136 | |
137 | static void put_signed_rect_clamped_8bit_c(uint8_t *dst, int dst_stride, const uint8_t *_src, int src_stride, int width, int height) |
138 | { |
139 | int x, y; |
140 | int16_t *src = (int16_t *)_src; |
141 | for (y = 0; y < height; y++) { |
142 | for (x = 0; x < width; x+=4) { |
143 | dst[x ] = av_clip_uint8(src[x ] + 128); |
144 | dst[x+1] = av_clip_uint8(src[x+1] + 128); |
145 | dst[x+2] = av_clip_uint8(src[x+2] + 128); |
146 | dst[x+3] = av_clip_uint8(src[x+3] + 128); |
147 | } |
148 | dst += dst_stride; |
149 | src += src_stride >> 1; |
150 | } |
151 | } |
152 | |
153 | #define PUT_SIGNED_RECT_CLAMPED(PX) \ |
154 | static void put_signed_rect_clamped_ ## PX ## bit_c(uint8_t *_dst, int dst_stride, const uint8_t *_src, \ |
155 | int src_stride, int width, int height) \ |
156 | { \ |
157 | int x, y; \ |
158 | uint16_t *dst = (uint16_t *)_dst; \ |
159 | int32_t *src = (int32_t *)_src; \ |
160 | for (y = 0; y < height; y++) { \ |
161 | for (x = 0; x < width; x+=4) { \ |
162 | dst[x ] = av_clip_uintp2(src[x ] + (1 << (PX - 1)), PX); \ |
163 | dst[x+1] = av_clip_uintp2(src[x+1] + (1 << (PX - 1)), PX); \ |
164 | dst[x+2] = av_clip_uintp2(src[x+2] + (1 << (PX - 1)), PX); \ |
165 | dst[x+3] = av_clip_uintp2(src[x+3] + (1 << (PX - 1)), PX); \ |
166 | } \ |
167 | dst += dst_stride >> 1; \ |
168 | src += src_stride >> 2; \ |
169 | } \ |
170 | } |
171 | |
172 | PUT_SIGNED_RECT_CLAMPED(10) |
173 | PUT_SIGNED_RECT_CLAMPED(12) |
174 | |
175 | static void add_rect_clamped_c(uint8_t *dst, const uint16_t *src, int stride, |
176 | const int16_t *idwt, int idwt_stride, |
177 | int width, int height) |
178 | { |
179 | int x, y; |
180 | |
181 | for (y = 0; y < height; y++) { |
182 | for (x = 0; x < width; x+=2) { |
183 | dst[x ] = av_clip_uint8(((src[x ]+32)>>6) + idwt[x ]); |
184 | dst[x+1] = av_clip_uint8(((src[x+1]+32)>>6) + idwt[x+1]); |
185 | } |
186 | dst += stride; |
187 | src += stride; |
188 | idwt += idwt_stride; |
189 | } |
190 | } |
191 | |
192 | #define DEQUANT_SUBBAND(PX) \ |
193 | static void dequant_subband_ ## PX ## _c(uint8_t *src, uint8_t *dst, ptrdiff_t stride, \ |
194 | const int qf, const int qs, int tot_v, int tot_h) \ |
195 | { \ |
196 | int i, y; \ |
197 | for (y = 0; y < tot_v; y++) { \ |
198 | PX c, sign, *src_r = (PX *)src, *dst_r = (PX *)dst; \ |
199 | for (i = 0; i < tot_h; i++) { \ |
200 | c = *src_r++; \ |
201 | sign = FFSIGN(c)*(!!c); \ |
202 | c = (FFABS(c)*qf + qs) >> 2; \ |
203 | *dst_r++ = c*sign; \ |
204 | } \ |
205 | src += tot_h << (sizeof(PX) >> 1); \ |
206 | dst += stride; \ |
207 | } \ |
208 | } |
209 | |
210 | DEQUANT_SUBBAND(int16_t) |
211 | DEQUANT_SUBBAND(int32_t) |
212 | |
213 | #define PIXFUNC(PFX, WIDTH) \ |
214 | c->PFX ## _dirac_pixels_tab[WIDTH>>4][0] = ff_ ## PFX ## _dirac_pixels ## WIDTH ## _c; \ |
215 | c->PFX ## _dirac_pixels_tab[WIDTH>>4][1] = ff_ ## PFX ## _dirac_pixels ## WIDTH ## _l2_c; \ |
216 | c->PFX ## _dirac_pixels_tab[WIDTH>>4][2] = ff_ ## PFX ## _dirac_pixels ## WIDTH ## _l4_c; \ |
217 | c->PFX ## _dirac_pixels_tab[WIDTH>>4][3] = ff_ ## PFX ## _dirac_pixels ## WIDTH ## _bilinear_c |
218 | |
219 | av_cold void ff_diracdsp_init(DiracDSPContext *c) |
220 | { |
221 | c->dirac_hpel_filter = dirac_hpel_filter; |
222 | c->add_rect_clamped = add_rect_clamped_c; |
223 | c->put_signed_rect_clamped[0] = put_signed_rect_clamped_8bit_c; |
224 | c->put_signed_rect_clamped[1] = put_signed_rect_clamped_10bit_c; |
225 | c->put_signed_rect_clamped[2] = put_signed_rect_clamped_12bit_c; |
226 | |
227 | c->add_dirac_obmc[0] = add_obmc8_c; |
228 | c->add_dirac_obmc[1] = add_obmc16_c; |
229 | c->add_dirac_obmc[2] = add_obmc32_c; |
230 | |
231 | c->weight_dirac_pixels_tab[0] = weight_dirac_pixels8_c; |
232 | c->weight_dirac_pixels_tab[1] = weight_dirac_pixels16_c; |
233 | c->weight_dirac_pixels_tab[2] = weight_dirac_pixels32_c; |
234 | c->biweight_dirac_pixels_tab[0] = biweight_dirac_pixels8_c; |
235 | c->biweight_dirac_pixels_tab[1] = biweight_dirac_pixels16_c; |
236 | c->biweight_dirac_pixels_tab[2] = biweight_dirac_pixels32_c; |
237 | |
238 | c->dequant_subband[0] = c->dequant_subband[2] = dequant_subband_int16_t_c; |
239 | c->dequant_subband[1] = c->dequant_subband[3] = dequant_subband_int32_t_c; |
240 | |
241 | PIXFUNC(put, 8); |
242 | PIXFUNC(put, 16); |
243 | PIXFUNC(put, 32); |
244 | PIXFUNC(avg, 8); |
245 | PIXFUNC(avg, 16); |
246 | PIXFUNC(avg, 32); |
247 | |
248 | if (ARCH_X86) |
249 | ff_diracdsp_init_x86(c); |
250 | } |
251 |