blob: 1d05b2fe08cb869353e12778f2312140a38de2e6
1 | /* |
2 | * Simple IDCT |
3 | * |
4 | * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at> |
5 | * |
6 | * This file is part of FFmpeg. |
7 | * |
8 | * FFmpeg is free software; you can redistribute it and/or |
9 | * modify it under the terms of the GNU Lesser General Public |
10 | * License as published by the Free Software Foundation; either |
11 | * version 2.1 of the License, or (at your option) any later version. |
12 | * |
13 | * FFmpeg is distributed in the hope that it will be useful, |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 | * Lesser General Public License for more details. |
17 | * |
18 | * You should have received a copy of the GNU Lesser General Public |
19 | * License along with FFmpeg; if not, write to the Free Software |
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
21 | */ |
22 | |
23 | /** |
24 | * @file |
25 | * simpleidct in C. |
26 | */ |
27 | |
28 | #include "libavutil/intreadwrite.h" |
29 | #include "avcodec.h" |
30 | #include "mathops.h" |
31 | #include "simple_idct.h" |
32 | |
33 | #define BIT_DEPTH 8 |
34 | #include "simple_idct_template.c" |
35 | #undef BIT_DEPTH |
36 | |
37 | #define BIT_DEPTH 10 |
38 | #include "simple_idct_template.c" |
39 | |
40 | #define EXTRA_SHIFT 2 |
41 | #include "simple_idct_template.c" |
42 | |
43 | #undef EXTRA_SHIFT |
44 | #undef BIT_DEPTH |
45 | |
46 | #define BIT_DEPTH 12 |
47 | #include "simple_idct_template.c" |
48 | #undef BIT_DEPTH |
49 | |
50 | /* 2x4x8 idct */ |
51 | |
52 | #define CN_SHIFT 12 |
53 | #define C_FIX(x) ((int)((x) * (1 << CN_SHIFT) + 0.5)) |
54 | #define C1 C_FIX(0.6532814824) |
55 | #define C2 C_FIX(0.2705980501) |
56 | |
57 | /* row idct is multiple by 16 * sqrt(2.0), col idct4 is normalized, |
58 | and the butterfly must be multiplied by 0.5 * sqrt(2.0) */ |
59 | #define C_SHIFT (4+1+12) |
60 | |
61 | static inline void idct4col_put(uint8_t *dest, ptrdiff_t line_size, const int16_t *col) |
62 | { |
63 | int c0, c1, c2, c3, a0, a1, a2, a3; |
64 | |
65 | a0 = col[8*0]; |
66 | a1 = col[8*2]; |
67 | a2 = col[8*4]; |
68 | a3 = col[8*6]; |
69 | c0 = ((a0 + a2) * (1 << CN_SHIFT - 1)) + (1 << (C_SHIFT - 1)); |
70 | c2 = ((a0 - a2) * (1 << CN_SHIFT - 1)) + (1 << (C_SHIFT - 1)); |
71 | c1 = a1 * C1 + a3 * C2; |
72 | c3 = a1 * C2 - a3 * C1; |
73 | dest[0] = av_clip_uint8((c0 + c1) >> C_SHIFT); |
74 | dest += line_size; |
75 | dest[0] = av_clip_uint8((c2 + c3) >> C_SHIFT); |
76 | dest += line_size; |
77 | dest[0] = av_clip_uint8((c2 - c3) >> C_SHIFT); |
78 | dest += line_size; |
79 | dest[0] = av_clip_uint8((c0 - c1) >> C_SHIFT); |
80 | } |
81 | |
82 | #define BF(k) \ |
83 | {\ |
84 | int a0, a1;\ |
85 | a0 = ptr[k];\ |
86 | a1 = ptr[8 + k];\ |
87 | ptr[k] = a0 + a1;\ |
88 | ptr[8 + k] = a0 - a1;\ |
89 | } |
90 | |
91 | /* only used by DV codec. The input must be interlaced. 128 is added |
92 | to the pixels before clamping to avoid systematic error |
93 | (1024*sqrt(2)) offset would be needed otherwise. */ |
94 | /* XXX: I think a 1.0/sqrt(2) normalization should be needed to |
95 | compensate the extra butterfly stage - I don't have the full DV |
96 | specification */ |
97 | void ff_simple_idct248_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block) |
98 | { |
99 | int i; |
100 | int16_t *ptr; |
101 | |
102 | /* butterfly */ |
103 | ptr = block; |
104 | for(i=0;i<4;i++) { |
105 | BF(0); |
106 | BF(1); |
107 | BF(2); |
108 | BF(3); |
109 | BF(4); |
110 | BF(5); |
111 | BF(6); |
112 | BF(7); |
113 | ptr += 2 * 8; |
114 | } |
115 | |
116 | /* IDCT8 on each line */ |
117 | for(i=0; i<8; i++) { |
118 | idctRowCondDC_8(block + i*8, 0); |
119 | } |
120 | |
121 | /* IDCT4 and store */ |
122 | for(i=0;i<8;i++) { |
123 | idct4col_put(dest + i, 2 * line_size, block + i); |
124 | idct4col_put(dest + line_size + i, 2 * line_size, block + 8 + i); |
125 | } |
126 | } |
127 | |
128 | /* 8x4 & 4x8 WMV2 IDCT */ |
129 | #undef CN_SHIFT |
130 | #undef C_SHIFT |
131 | #undef C_FIX |
132 | #undef C1 |
133 | #undef C2 |
134 | #define CN_SHIFT 12 |
135 | #define C_FIX(x) ((int)((x) * M_SQRT2 * (1 << CN_SHIFT) + 0.5)) |
136 | #define C1 C_FIX(0.6532814824) |
137 | #define C2 C_FIX(0.2705980501) |
138 | #define C3 C_FIX(0.5) |
139 | #define C_SHIFT (4+1+12) |
140 | static inline void idct4col_add(uint8_t *dest, ptrdiff_t line_size, const int16_t *col) |
141 | { |
142 | int c0, c1, c2, c3, a0, a1, a2, a3; |
143 | |
144 | a0 = col[8*0]; |
145 | a1 = col[8*1]; |
146 | a2 = col[8*2]; |
147 | a3 = col[8*3]; |
148 | c0 = (a0 + a2)*C3 + (1 << (C_SHIFT - 1)); |
149 | c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1)); |
150 | c1 = a1 * C1 + a3 * C2; |
151 | c3 = a1 * C2 - a3 * C1; |
152 | dest[0] = av_clip_uint8(dest[0] + ((c0 + c1) >> C_SHIFT)); |
153 | dest += line_size; |
154 | dest[0] = av_clip_uint8(dest[0] + ((c2 + c3) >> C_SHIFT)); |
155 | dest += line_size; |
156 | dest[0] = av_clip_uint8(dest[0] + ((c2 - c3) >> C_SHIFT)); |
157 | dest += line_size; |
158 | dest[0] = av_clip_uint8(dest[0] + ((c0 - c1) >> C_SHIFT)); |
159 | } |
160 | |
161 | #define RN_SHIFT 15 |
162 | #define R_FIX(x) ((int)((x) * M_SQRT2 * (1 << RN_SHIFT) + 0.5)) |
163 | #define R1 R_FIX(0.6532814824) |
164 | #define R2 R_FIX(0.2705980501) |
165 | #define R3 R_FIX(0.5) |
166 | #define R_SHIFT 11 |
167 | static inline void idct4row(int16_t *row) |
168 | { |
169 | int c0, c1, c2, c3, a0, a1, a2, a3; |
170 | |
171 | a0 = row[0]; |
172 | a1 = row[1]; |
173 | a2 = row[2]; |
174 | a3 = row[3]; |
175 | c0 = (a0 + a2)*R3 + (1 << (R_SHIFT - 1)); |
176 | c2 = (a0 - a2)*R3 + (1 << (R_SHIFT - 1)); |
177 | c1 = a1 * R1 + a3 * R2; |
178 | c3 = a1 * R2 - a3 * R1; |
179 | row[0]= (c0 + c1) >> R_SHIFT; |
180 | row[1]= (c2 + c3) >> R_SHIFT; |
181 | row[2]= (c2 - c3) >> R_SHIFT; |
182 | row[3]= (c0 - c1) >> R_SHIFT; |
183 | } |
184 | |
185 | void ff_simple_idct84_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block) |
186 | { |
187 | int i; |
188 | |
189 | /* IDCT8 on each line */ |
190 | for(i=0; i<4; i++) { |
191 | idctRowCondDC_8(block + i*8, 0); |
192 | } |
193 | |
194 | /* IDCT4 and store */ |
195 | for(i=0;i<8;i++) { |
196 | idct4col_add(dest + i, line_size, block + i); |
197 | } |
198 | } |
199 | |
200 | void ff_simple_idct48_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block) |
201 | { |
202 | int i; |
203 | |
204 | /* IDCT4 on each line */ |
205 | for(i=0; i<8; i++) { |
206 | idct4row(block + i*8); |
207 | } |
208 | |
209 | /* IDCT8 and store */ |
210 | for(i=0; i<4; i++){ |
211 | idctSparseColAdd_8(dest + i, line_size, block + i); |
212 | } |
213 | } |
214 | |
215 | void ff_simple_idct44_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block) |
216 | { |
217 | int i; |
218 | |
219 | /* IDCT4 on each line */ |
220 | for(i=0; i<4; i++) { |
221 | idct4row(block + i*8); |
222 | } |
223 | |
224 | /* IDCT4 and store */ |
225 | for(i=0; i<4; i++){ |
226 | idct4col_add(dest + i, line_size, block + i); |
227 | } |
228 | } |
229 | |
230 | void ff_prores_idct(int16_t *block, const int16_t *qmat) |
231 | { |
232 | int i; |
233 | |
234 | for (i = 0; i < 64; i++) |
235 | block[i] *= qmat[i]; |
236 | |
237 | for (i = 0; i < 8; i++) |
238 | idctRowCondDC_extrashift_10(block + i*8, 2); |
239 | |
240 | for (i = 0; i < 8; i++) { |
241 | block[i] += 8192; |
242 | idctSparseCol_extrashift_10(block + i); |
243 | } |
244 | } |
245 |