blob: 025776bac9c1fcc873f911bc576c85c914ab5c84
1 | /* |
2 | * VC-1 and WMV3 decoder |
3 | * Copyright (c) 2011 Mashiat Sarker Shakkhar |
4 | * Copyright (c) 2006-2007 Konstantin Shishkov |
5 | * Partly based on vc9.c (c) 2005 Anonymous, Alex Beregszaszi, Michael Niedermayer |
6 | * |
7 | * This file is part of FFmpeg. |
8 | * |
9 | * FFmpeg is free software; you can redistribute it and/or |
10 | * modify it under the terms of the GNU Lesser General Public |
11 | * License as published by the Free Software Foundation; either |
12 | * version 2.1 of the License, or (at your option) any later version. |
13 | * |
14 | * FFmpeg is distributed in the hope that it will be useful, |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
17 | * Lesser General Public License for more details. |
18 | * |
19 | * You should have received a copy of the GNU Lesser General Public |
20 | * License along with FFmpeg; if not, write to the Free Software |
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
22 | */ |
23 | |
24 | /** |
25 | * @file |
26 | * VC-1 and WMV3 loopfilter |
27 | */ |
28 | |
29 | #include "avcodec.h" |
30 | #include "mpegvideo.h" |
31 | #include "vc1.h" |
32 | #include "vc1dsp.h" |
33 | |
34 | void ff_vc1_loop_filter_iblk(VC1Context *v, int pq) |
35 | { |
36 | MpegEncContext *s = &v->s; |
37 | int j; |
38 | if (!s->first_slice_line) { |
39 | v->vc1dsp.vc1_v_loop_filter16(s->dest[0], s->linesize, pq); |
40 | if (s->mb_x) |
41 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq); |
42 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize + 8, s->linesize, pq); |
43 | if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) |
44 | for (j = 0; j < 2; j++) { |
45 | v->vc1dsp.vc1_v_loop_filter8(s->dest[j + 1], s->uvlinesize, pq); |
46 | if (s->mb_x) |
47 | v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize, s->uvlinesize, pq); |
48 | } |
49 | } |
50 | v->vc1dsp.vc1_v_loop_filter16(s->dest[0] + 8 * s->linesize, s->linesize, pq); |
51 | |
52 | if (s->mb_y == s->end_mb_y - 1) { |
53 | if (s->mb_x) { |
54 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0], s->linesize, pq); |
55 | if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) { |
56 | v->vc1dsp.vc1_h_loop_filter8(s->dest[1], s->uvlinesize, pq); |
57 | v->vc1dsp.vc1_h_loop_filter8(s->dest[2], s->uvlinesize, pq); |
58 | } |
59 | } |
60 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0] + 8, s->linesize, pq); |
61 | } |
62 | } |
63 | |
64 | void ff_vc1_loop_filter_iblk_delayed(VC1Context *v, int pq) |
65 | { |
66 | MpegEncContext *s = &v->s; |
67 | int j; |
68 | |
69 | /* The loopfilter runs 1 row and 1 column behind the overlap filter, which |
70 | * means it runs two rows/cols behind the decoding loop. */ |
71 | if (!s->first_slice_line) { |
72 | if (s->mb_x) { |
73 | if (s->mb_y >= s->start_mb_y + 2) { |
74 | v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 16 * s->linesize - 16, s->linesize, pq); |
75 | |
76 | if (s->mb_x >= 2) |
77 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize - 16, s->linesize, pq); |
78 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize - 8, s->linesize, pq); |
79 | if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) |
80 | for (j = 0; j < 2; j++) { |
81 | v->vc1dsp.vc1_v_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize - 8, s->uvlinesize, pq); |
82 | if (s->mb_x >= 2) { |
83 | v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 16 * s->uvlinesize - 8, s->uvlinesize, pq); |
84 | } |
85 | } |
86 | } |
87 | v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 8 * s->linesize - 16, s->linesize, pq); |
88 | } |
89 | |
90 | if (s->mb_x == s->mb_width - 1) { |
91 | if (s->mb_y >= s->start_mb_y + 2) { |
92 | v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq); |
93 | |
94 | if (s->mb_x) |
95 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize, s->linesize, pq); |
96 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize + 8, s->linesize, pq); |
97 | if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) |
98 | for (j = 0; j < 2; j++) { |
99 | v->vc1dsp.vc1_v_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize, s->uvlinesize, pq); |
100 | if (s->mb_x >= 2) { |
101 | v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 16 * s->uvlinesize, s->uvlinesize, pq); |
102 | } |
103 | } |
104 | } |
105 | v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 8 * s->linesize, s->linesize, pq); |
106 | } |
107 | |
108 | if (s->mb_y == s->end_mb_y) { |
109 | if (s->mb_x) { |
110 | if (s->mb_x >= 2) |
111 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize - 16, s->linesize, pq); |
112 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize - 8, s->linesize, pq); |
113 | if (s->mb_x >= 2 && (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY))) { |
114 | for (j = 0; j < 2; j++) { |
115 | v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize - 8, s->uvlinesize, pq); |
116 | } |
117 | } |
118 | } |
119 | |
120 | if (s->mb_x == s->mb_width - 1) { |
121 | if (s->mb_x) |
122 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq); |
123 | v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize + 8, s->linesize, pq); |
124 | if (s->mb_x && (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY))) { |
125 | for (j = 0; j < 2; j++) { |
126 | v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize, s->uvlinesize, pq); |
127 | } |
128 | } |
129 | } |
130 | } |
131 | } |
132 | } |
133 | |
134 | void ff_vc1_smooth_overlap_filter_iblk(VC1Context *v) |
135 | { |
136 | MpegEncContext *s = &v->s; |
137 | int mb_pos; |
138 | |
139 | if (v->condover == CONDOVER_NONE) |
140 | return; |
141 | |
142 | mb_pos = s->mb_x + s->mb_y * s->mb_stride; |
143 | |
144 | /* Within a MB, the horizontal overlap always runs before the vertical. |
145 | * To accomplish that, we run the H on left and internal borders of the |
146 | * currently decoded MB. Then, we wait for the next overlap iteration |
147 | * to do H overlap on the right edge of this MB, before moving over and |
148 | * running the V overlap. Therefore, the V overlap makes us trail by one |
149 | * MB col and the H overlap filter makes us trail by one MB row. This |
150 | * is reflected in the time at which we run the put_pixels loop. */ |
151 | if (v->condover == CONDOVER_ALL || v->pq >= 9 || v->over_flags_plane[mb_pos]) { |
152 | if (s->mb_x && (v->condover == CONDOVER_ALL || v->pq >= 9 || |
153 | v->over_flags_plane[mb_pos - 1])) { |
154 | v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][1], |
155 | v->block[v->cur_blk_idx][0]); |
156 | v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][3], |
157 | v->block[v->cur_blk_idx][2]); |
158 | if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) { |
159 | v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][4], |
160 | v->block[v->cur_blk_idx][4]); |
161 | v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][5], |
162 | v->block[v->cur_blk_idx][5]); |
163 | } |
164 | } |
165 | v->vc1dsp.vc1_h_s_overlap(v->block[v->cur_blk_idx][0], |
166 | v->block[v->cur_blk_idx][1]); |
167 | v->vc1dsp.vc1_h_s_overlap(v->block[v->cur_blk_idx][2], |
168 | v->block[v->cur_blk_idx][3]); |
169 | |
170 | if (s->mb_x == s->mb_width - 1) { |
171 | if (!s->first_slice_line && (v->condover == CONDOVER_ALL || v->pq >= 9 || |
172 | v->over_flags_plane[mb_pos - s->mb_stride])) { |
173 | v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][2], |
174 | v->block[v->cur_blk_idx][0]); |
175 | v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][3], |
176 | v->block[v->cur_blk_idx][1]); |
177 | if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) { |
178 | v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][4], |
179 | v->block[v->cur_blk_idx][4]); |
180 | v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][5], |
181 | v->block[v->cur_blk_idx][5]); |
182 | } |
183 | } |
184 | v->vc1dsp.vc1_v_s_overlap(v->block[v->cur_blk_idx][0], |
185 | v->block[v->cur_blk_idx][2]); |
186 | v->vc1dsp.vc1_v_s_overlap(v->block[v->cur_blk_idx][1], |
187 | v->block[v->cur_blk_idx][3]); |
188 | } |
189 | } |
190 | if (s->mb_x && (v->condover == CONDOVER_ALL || v->over_flags_plane[mb_pos - 1])) { |
191 | if (!s->first_slice_line && (v->condover == CONDOVER_ALL || v->pq >= 9 || |
192 | v->over_flags_plane[mb_pos - s->mb_stride - 1])) { |
193 | v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][2], |
194 | v->block[v->left_blk_idx][0]); |
195 | v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][3], |
196 | v->block[v->left_blk_idx][1]); |
197 | if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) { |
198 | v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][4], |
199 | v->block[v->left_blk_idx][4]); |
200 | v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][5], |
201 | v->block[v->left_blk_idx][5]); |
202 | } |
203 | } |
204 | v->vc1dsp.vc1_v_s_overlap(v->block[v->left_blk_idx][0], |
205 | v->block[v->left_blk_idx][2]); |
206 | v->vc1dsp.vc1_v_s_overlap(v->block[v->left_blk_idx][1], |
207 | v->block[v->left_blk_idx][3]); |
208 | } |
209 | } |
210 | |
211 | static av_always_inline void vc1_apply_p_v_loop_filter(VC1Context *v, int block_num) |
212 | { |
213 | MpegEncContext *s = &v->s; |
214 | int mb_cbp = v->cbp[s->mb_x - s->mb_stride], |
215 | block_cbp = mb_cbp >> (block_num * 4), bottom_cbp, |
216 | mb_is_intra = v->is_intra[s->mb_x - s->mb_stride], |
217 | block_is_intra = mb_is_intra >> block_num, bottom_is_intra; |
218 | int idx, linesize = block_num > 3 ? s->uvlinesize : s->linesize, ttblk; |
219 | uint8_t *dst; |
220 | |
221 | if (block_num > 3) { |
222 | dst = s->dest[block_num - 3]; |
223 | } else { |
224 | dst = s->dest[0] + (block_num & 1) * 8 + ((block_num & 2) * 4 - 8) * linesize; |
225 | } |
226 | if (s->mb_y != s->end_mb_y || block_num < 2) { |
227 | int16_t (*mv)[2]; |
228 | int mv_stride; |
229 | |
230 | if (block_num > 3) { |
231 | bottom_cbp = v->cbp[s->mb_x] >> (block_num * 4); |
232 | bottom_is_intra = v->is_intra[s->mb_x] >> block_num; |
233 | mv = &v->luma_mv[s->mb_x - s->mb_stride]; |
234 | mv_stride = s->mb_stride; |
235 | } else { |
236 | bottom_cbp = (block_num < 2) ? (mb_cbp >> ((block_num + 2) * 4)) |
237 | : (v->cbp[s->mb_x] >> ((block_num - 2) * 4)); |
238 | bottom_is_intra = (block_num < 2) ? (mb_is_intra >> (block_num + 2)) |
239 | : (v->is_intra[s->mb_x] >> (block_num - 2)); |
240 | mv_stride = s->b8_stride; |
241 | mv = &s->current_picture.motion_val[0][s->block_index[block_num] - 2 * mv_stride]; |
242 | } |
243 | |
244 | if (bottom_is_intra & 1 || block_is_intra & 1 || |
245 | mv[0][0] != mv[mv_stride][0] || mv[0][1] != mv[mv_stride][1]) { |
246 | v->vc1dsp.vc1_v_loop_filter8(dst, linesize, v->pq); |
247 | } else { |
248 | idx = ((bottom_cbp >> 2) | block_cbp) & 3; |
249 | if (idx == 3) { |
250 | v->vc1dsp.vc1_v_loop_filter8(dst, linesize, v->pq); |
251 | } else if (idx) { |
252 | if (idx == 1) |
253 | v->vc1dsp.vc1_v_loop_filter4(dst + 4, linesize, v->pq); |
254 | else |
255 | v->vc1dsp.vc1_v_loop_filter4(dst, linesize, v->pq); |
256 | } |
257 | } |
258 | } |
259 | |
260 | dst -= 4 * linesize; |
261 | ttblk = (v->ttblk[s->mb_x - s->mb_stride] >> (block_num * 4)) & 0xF; |
262 | if (ttblk == TT_4X4 || ttblk == TT_8X4) { |
263 | idx = (block_cbp | (block_cbp >> 2)) & 3; |
264 | if (idx == 3) { |
265 | v->vc1dsp.vc1_v_loop_filter8(dst, linesize, v->pq); |
266 | } else if (idx) { |
267 | if (idx == 1) |
268 | v->vc1dsp.vc1_v_loop_filter4(dst + 4, linesize, v->pq); |
269 | else |
270 | v->vc1dsp.vc1_v_loop_filter4(dst, linesize, v->pq); |
271 | } |
272 | } |
273 | } |
274 | |
275 | static av_always_inline void vc1_apply_p_h_loop_filter(VC1Context *v, int block_num) |
276 | { |
277 | MpegEncContext *s = &v->s; |
278 | int mb_cbp = v->cbp[s->mb_x - 1 - s->mb_stride], |
279 | block_cbp = mb_cbp >> (block_num * 4), right_cbp, |
280 | mb_is_intra = v->is_intra[s->mb_x - 1 - s->mb_stride], |
281 | block_is_intra = mb_is_intra >> block_num, right_is_intra; |
282 | int idx, linesize = block_num > 3 ? s->uvlinesize : s->linesize, ttblk; |
283 | uint8_t *dst; |
284 | |
285 | if (block_num > 3) { |
286 | dst = s->dest[block_num - 3] - 8 * linesize; |
287 | } else { |
288 | dst = s->dest[0] + (block_num & 1) * 8 + ((block_num & 2) * 4 - 16) * linesize - 8; |
289 | } |
290 | |
291 | if (s->mb_x != s->mb_width || !(block_num & 5)) { |
292 | int16_t (*mv)[2]; |
293 | |
294 | if (block_num > 3) { |
295 | right_cbp = v->cbp[s->mb_x - s->mb_stride] >> (block_num * 4); |
296 | right_is_intra = v->is_intra[s->mb_x - s->mb_stride] >> block_num; |
297 | mv = &v->luma_mv[s->mb_x - s->mb_stride - 1]; |
298 | } else { |
299 | right_cbp = (block_num & 1) ? (v->cbp[s->mb_x - s->mb_stride] >> ((block_num - 1) * 4)) |
300 | : (mb_cbp >> ((block_num + 1) * 4)); |
301 | right_is_intra = (block_num & 1) ? (v->is_intra[s->mb_x - s->mb_stride] >> (block_num - 1)) |
302 | : (mb_is_intra >> (block_num + 1)); |
303 | mv = &s->current_picture.motion_val[0][s->block_index[block_num] - s->b8_stride * 2 - 2]; |
304 | } |
305 | if (block_is_intra & 1 || right_is_intra & 1 || mv[0][0] != mv[1][0] || mv[0][1] != mv[1][1]) { |
306 | v->vc1dsp.vc1_h_loop_filter8(dst, linesize, v->pq); |
307 | } else { |
308 | idx = ((right_cbp >> 1) | block_cbp) & 5; // FIXME check |
309 | if (idx == 5) { |
310 | v->vc1dsp.vc1_h_loop_filter8(dst, linesize, v->pq); |
311 | } else if (idx) { |
312 | if (idx == 1) |
313 | v->vc1dsp.vc1_h_loop_filter4(dst + 4 * linesize, linesize, v->pq); |
314 | else |
315 | v->vc1dsp.vc1_h_loop_filter4(dst, linesize, v->pq); |
316 | } |
317 | } |
318 | } |
319 | |
320 | dst -= 4; |
321 | ttblk = (v->ttblk[s->mb_x - s->mb_stride - 1] >> (block_num * 4)) & 0xf; |
322 | if (ttblk == TT_4X4 || ttblk == TT_4X8) { |
323 | idx = (block_cbp | (block_cbp >> 1)) & 5; |
324 | if (idx == 5) { |
325 | v->vc1dsp.vc1_h_loop_filter8(dst, linesize, v->pq); |
326 | } else if (idx) { |
327 | if (idx == 1) |
328 | v->vc1dsp.vc1_h_loop_filter4(dst + linesize * 4, linesize, v->pq); |
329 | else |
330 | v->vc1dsp.vc1_h_loop_filter4(dst, linesize, v->pq); |
331 | } |
332 | } |
333 | } |
334 | |
335 | void ff_vc1_apply_p_loop_filter(VC1Context *v) |
336 | { |
337 | MpegEncContext *s = &v->s; |
338 | int i; |
339 | int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6; |
340 | |
341 | for (i = 0; i < block_count; i++) { |
342 | vc1_apply_p_v_loop_filter(v, i); |
343 | } |
344 | |
345 | /* V always precedes H, therefore we run H one MB before V; |
346 | * at the end of a row, we catch up to complete the row */ |
347 | if (s->mb_x) { |
348 | for (i = 0; i < block_count; i++) { |
349 | vc1_apply_p_h_loop_filter(v, i); |
350 | } |
351 | if (s->mb_x == s->mb_width - 1) { |
352 | s->mb_x++; |
353 | ff_update_block_index(s); |
354 | for (i = 0; i < block_count; i++) { |
355 | vc1_apply_p_h_loop_filter(v, i); |
356 | } |
357 | } |
358 | } |
359 | } |
360 |