blob: c60b003a31303c3e03b79f90f62093560ea84369
1 | /* |
2 | * Copyright (C) 2007 Marco Gerards <marco@gnu.org> |
3 | * Copyright (C) 2016 Open Broadcast Systems Ltd. |
4 | * Author 2016 Rostislav Pehlivanov <atomnuker@gmail.com> |
5 | * |
6 | * This file is part of FFmpeg. |
7 | * |
8 | * FFmpeg is free software; you can redistribute it and/or |
9 | * modify it under the terms of the GNU Lesser General Public |
10 | * License as published by the Free Software Foundation; either |
11 | * version 2.1 of the License, or (at your option) any later version. |
12 | * |
13 | * FFmpeg is distributed in the hope that it will be useful, |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 | * Lesser General Public License for more details. |
17 | * |
18 | * You should have received a copy of the GNU Lesser General Public |
19 | * License along with FFmpeg; if not, write to the Free Software |
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
21 | */ |
22 | |
23 | #include "libavutil/attributes.h" |
24 | #include "libavutil/mem.h" |
25 | #include "vc2enc_dwt.h" |
26 | |
27 | /* Since the transforms spit out interleaved coefficients, this function |
28 | * rearranges the coefficients into the more traditional subdivision, |
29 | * making it easier to encode and perform another level. */ |
30 | static av_always_inline void deinterleave(dwtcoef *linell, ptrdiff_t stride, |
31 | int width, int height, dwtcoef *synthl) |
32 | { |
33 | int x, y; |
34 | ptrdiff_t synthw = width << 1; |
35 | dwtcoef *linehl = linell + width; |
36 | dwtcoef *linelh = linell + height*stride; |
37 | dwtcoef *linehh = linelh + width; |
38 | |
39 | /* Deinterleave the coefficients. */ |
40 | for (y = 0; y < height; y++) { |
41 | for (x = 0; x < width; x++) { |
42 | linell[x] = synthl[(x << 1)]; |
43 | linehl[x] = synthl[(x << 1) + 1]; |
44 | linelh[x] = synthl[(x << 1) + synthw]; |
45 | linehh[x] = synthl[(x << 1) + synthw + 1]; |
46 | } |
47 | synthl += synthw << 1; |
48 | linell += stride; |
49 | linelh += stride; |
50 | linehl += stride; |
51 | linehh += stride; |
52 | } |
53 | } |
54 | |
55 | static void vc2_subband_dwt_97(VC2TransformContext *t, dwtcoef *data, |
56 | ptrdiff_t stride, int width, int height) |
57 | { |
58 | int x, y; |
59 | dwtcoef *datal = data, *synth = t->buffer, *synthl = synth; |
60 | const ptrdiff_t synth_width = width << 1; |
61 | const ptrdiff_t synth_height = height << 1; |
62 | |
63 | /* |
64 | * Shift in one bit that is used for additional precision and copy |
65 | * the data to the buffer. |
66 | */ |
67 | for (y = 0; y < synth_height; y++) { |
68 | for (x = 0; x < synth_width; x++) |
69 | synthl[x] = datal[x] << 1; |
70 | synthl += synth_width; |
71 | datal += stride; |
72 | } |
73 | |
74 | /* Horizontal synthesis. */ |
75 | synthl = synth; |
76 | for (y = 0; y < synth_height; y++) { |
77 | /* Lifting stage 2. */ |
78 | synthl[1] -= (8*synthl[0] + 9*synthl[2] - synthl[4] + 8) >> 4; |
79 | for (x = 1; x < width - 2; x++) |
80 | synthl[2*x + 1] -= (9*synthl[2*x] + 9*synthl[2*x + 2] - synthl[2*x + 4] - |
81 | synthl[2 * x - 2] + 8) >> 4; |
82 | synthl[synth_width - 1] -= (17*synthl[synth_width - 2] - |
83 | synthl[synth_width - 4] + 8) >> 4; |
84 | synthl[synth_width - 3] -= (8*synthl[synth_width - 2] + |
85 | 9*synthl[synth_width - 4] - |
86 | synthl[synth_width - 6] + 8) >> 4; |
87 | /* Lifting stage 1. */ |
88 | synthl[0] += (synthl[1] + synthl[1] + 2) >> 2; |
89 | for (x = 1; x < width - 1; x++) |
90 | synthl[2*x] += (synthl[2*x - 1] + synthl[2*x + 1] + 2) >> 2; |
91 | |
92 | synthl[synth_width - 2] += (synthl[synth_width - 3] + |
93 | synthl[synth_width - 1] + 2) >> 2; |
94 | synthl += synth_width; |
95 | } |
96 | |
97 | /* Vertical synthesis: Lifting stage 2. */ |
98 | synthl = synth + synth_width; |
99 | for (x = 0; x < synth_width; x++) |
100 | synthl[x] -= (8*synthl[x - synth_width] + 9*synthl[x + synth_width] - |
101 | synthl[x + 3 * synth_width] + 8) >> 4; |
102 | |
103 | synthl = synth + (synth_width << 1); |
104 | for (y = 1; y < height - 2; y++) { |
105 | for (x = 0; x < synth_width; x++) |
106 | synthl[x + synth_width] -= (9*synthl[x] + |
107 | 9*synthl[x + 2 * synth_width] - |
108 | synthl[x - 2 * synth_width] - |
109 | synthl[x + 4 * synth_width] + 8) >> 4; |
110 | synthl += synth_width << 1; |
111 | } |
112 | |
113 | synthl = synth + (synth_height - 1) * synth_width; |
114 | for (x = 0; x < synth_width; x++) { |
115 | synthl[x] -= (17*synthl[x - synth_width] - |
116 | synthl[x - 3*synth_width] + 8) >> 4; |
117 | synthl[x - 2*synth_width] -= (9*synthl[x - 3*synth_width] + |
118 | 8*synthl[x - 1*synth_width] - synthl[x - 5*synth_width] + 8) >> 4; |
119 | } |
120 | |
121 | /* Vertical synthesis: Lifting stage 1. */ |
122 | synthl = synth; |
123 | for (x = 0; x < synth_width; x++) |
124 | synthl[x] += (synthl[x + synth_width] + synthl[x + synth_width] + 2) >> 2; |
125 | |
126 | synthl = synth + (synth_width << 1); |
127 | for (y = 1; y < height - 1; y++) { |
128 | for (x = 0; x < synth_width; x++) |
129 | synthl[x] += (synthl[x - synth_width] + synthl[x + synth_width] + 2) >> 2; |
130 | synthl += synth_width << 1; |
131 | } |
132 | |
133 | synthl = synth + (synth_height - 2) * synth_width; |
134 | for (x = 0; x < synth_width; x++) |
135 | synthl[x] += (synthl[x - synth_width] + synthl[x + synth_width] + 2) >> 2; |
136 | |
137 | deinterleave(data, stride, width, height, synth); |
138 | } |
139 | |
140 | static void vc2_subband_dwt_53(VC2TransformContext *t, dwtcoef *data, |
141 | ptrdiff_t stride, int width, int height) |
142 | { |
143 | int x, y; |
144 | dwtcoef *synth = t->buffer, *synthl = synth, *datal = data; |
145 | const ptrdiff_t synth_width = width << 1; |
146 | const ptrdiff_t synth_height = height << 1; |
147 | |
148 | /* |
149 | * Shift in one bit that is used for additional precision and copy |
150 | * the data to the buffer. |
151 | */ |
152 | for (y = 0; y < synth_height; y++) { |
153 | for (x = 0; x < synth_width; x++) |
154 | synthl[x] = datal[x] << 1; |
155 | synthl += synth_width; |
156 | datal += stride; |
157 | } |
158 | |
159 | /* Horizontal synthesis. */ |
160 | synthl = synth; |
161 | for (y = 0; y < synth_height; y++) { |
162 | /* Lifting stage 2. */ |
163 | for (x = 0; x < width - 1; x++) |
164 | synthl[2 * x + 1] -= (synthl[2 * x] + synthl[2 * x + 2] + 1) >> 1; |
165 | |
166 | synthl[synth_width - 1] -= (2*synthl[synth_width - 2] + 1) >> 1; |
167 | |
168 | /* Lifting stage 1. */ |
169 | synthl[0] += (2*synthl[1] + 2) >> 2; |
170 | for (x = 1; x < width - 1; x++) |
171 | synthl[2 * x] += (synthl[2 * x - 1] + synthl[2 * x + 1] + 2) >> 2; |
172 | |
173 | synthl[synth_width - 2] += (synthl[synth_width - 3] + synthl[synth_width - 1] + 2) >> 2; |
174 | |
175 | synthl += synth_width; |
176 | } |
177 | |
178 | /* Vertical synthesis: Lifting stage 2. */ |
179 | synthl = synth + synth_width; |
180 | for (x = 0; x < synth_width; x++) |
181 | synthl[x] -= (synthl[x - synth_width] + synthl[x + synth_width] + 1) >> 1; |
182 | |
183 | synthl = synth + (synth_width << 1); |
184 | for (y = 1; y < height - 1; y++) { |
185 | for (x = 0; x < synth_width; x++) |
186 | synthl[x + synth_width] -= (synthl[x] + synthl[x + synth_width * 2] + 1) >> 1; |
187 | synthl += (synth_width << 1); |
188 | } |
189 | |
190 | synthl = synth + (synth_height - 1) * synth_width; |
191 | for (x = 0; x < synth_width; x++) |
192 | synthl[x] -= (2*synthl[x - synth_width] + 1) >> 1; |
193 | |
194 | /* Vertical synthesis: Lifting stage 1. */ |
195 | synthl = synth; |
196 | for (x = 0; x < synth_width; x++) |
197 | synthl[x] += (2*synthl[synth_width + x] + 2) >> 2; |
198 | |
199 | synthl = synth + (synth_width << 1); |
200 | for (y = 1; y < height - 1; y++) { |
201 | for (x = 0; x < synth_width; x++) |
202 | synthl[x] += (synthl[x + synth_width] + synthl[x - synth_width] + 2) >> 2; |
203 | synthl += (synth_width << 1); |
204 | } |
205 | |
206 | synthl = synth + (synth_height - 2)*synth_width; |
207 | for (x = 0; x < synth_width; x++) |
208 | synthl[x] += (synthl[x - synth_width] + synthl[x + synth_width] + 2) >> 2; |
209 | |
210 | |
211 | deinterleave(data, stride, width, height, synth); |
212 | } |
213 | |
214 | static av_always_inline void dwt_haar(VC2TransformContext *t, dwtcoef *data, |
215 | ptrdiff_t stride, int width, int height, |
216 | const int s) |
217 | { |
218 | int x, y; |
219 | dwtcoef *synth = t->buffer, *synthl = synth, *datal = data; |
220 | const ptrdiff_t synth_width = width << 1; |
221 | const ptrdiff_t synth_height = height << 1; |
222 | |
223 | /* Horizontal synthesis. */ |
224 | for (y = 0; y < synth_height; y++) { |
225 | for (x = 0; x < synth_width; x += 2) { |
226 | synthl[y*synth_width + x + 1] = (datal[y*stride + x + 1] << s) - |
227 | (datal[y*stride + x] << s); |
228 | synthl[y*synth_width + x] = (datal[y*stride + x + 0] << s) + |
229 | ((synthl[y*synth_width + x + 1] + 1) >> 1); |
230 | } |
231 | } |
232 | |
233 | /* Vertical synthesis. */ |
234 | for (x = 0; x < synth_width; x++) { |
235 | for (y = 0; y < synth_height; y += 2) { |
236 | synthl[(y + 1)*synth_width + x] = synthl[(y + 1)*synth_width + x] - |
237 | synthl[y*synth_width + x]; |
238 | synthl[y*synth_width + x] = synthl[y*synth_width + x] + |
239 | ((synthl[(y + 1)*synth_width + x] + 1) >> 1); |
240 | } |
241 | } |
242 | |
243 | deinterleave(data, stride, width, height, synth); |
244 | } |
245 | |
246 | static void vc2_subband_dwt_haar(VC2TransformContext *t, dwtcoef *data, |
247 | ptrdiff_t stride, int width, int height) |
248 | { |
249 | dwt_haar(t, data, stride, width, height, 0); |
250 | } |
251 | |
252 | static void vc2_subband_dwt_haar_shift(VC2TransformContext *t, dwtcoef *data, |
253 | ptrdiff_t stride, int width, int height) |
254 | { |
255 | dwt_haar(t, data, stride, width, height, 1); |
256 | } |
257 | |
258 | av_cold int ff_vc2enc_init_transforms(VC2TransformContext *s, int p_width, int p_height) |
259 | { |
260 | s->vc2_subband_dwt[VC2_TRANSFORM_9_7] = vc2_subband_dwt_97; |
261 | s->vc2_subband_dwt[VC2_TRANSFORM_5_3] = vc2_subband_dwt_53; |
262 | s->vc2_subband_dwt[VC2_TRANSFORM_HAAR] = vc2_subband_dwt_haar; |
263 | s->vc2_subband_dwt[VC2_TRANSFORM_HAAR_S] = vc2_subband_dwt_haar_shift; |
264 | |
265 | s->buffer = av_malloc(2*p_width*p_height*sizeof(dwtcoef)); |
266 | if (!s->buffer) |
267 | return 1; |
268 | |
269 | return 0; |
270 | } |
271 | |
272 | av_cold void ff_vc2enc_free_transforms(VC2TransformContext *s) |
273 | { |
274 | av_freep(&s->buffer); |
275 | } |
276 |