blob: ae2f0e4c6f752f7f3dca8dbd4eed8df1d83fb618
1 | /* |
2 | * VP9 compatible video decoder |
3 | * |
4 | * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com> |
5 | * Copyright (C) 2013 Clément Bœsch <u pkh me> |
6 | * |
7 | * This file is part of FFmpeg. |
8 | * |
9 | * FFmpeg is free software; you can redistribute it and/or |
10 | * modify it under the terms of the GNU Lesser General Public |
11 | * License as published by the Free Software Foundation; either |
12 | * version 2.1 of the License, or (at your option) any later version. |
13 | * |
14 | * FFmpeg is distributed in the hope that it will be useful, |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
17 | * Lesser General Public License for more details. |
18 | * |
19 | * You should have received a copy of the GNU Lesser General Public |
20 | * License along with FFmpeg; if not, write to the Free Software |
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
22 | */ |
23 | |
24 | #include "libavutil/avassert.h" |
25 | |
26 | #include "avcodec.h" |
27 | #include "internal.h" |
28 | #include "videodsp.h" |
29 | #include "vp56.h" |
30 | #include "vp9.h" |
31 | #include "vp9data.h" |
32 | #include "vp9dec.h" |
33 | |
34 | static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h, |
35 | ptrdiff_t stride, int v) |
36 | { |
37 | switch (w) { |
38 | case 1: |
39 | do { |
40 | *ptr = v; |
41 | ptr += stride; |
42 | } while (--h); |
43 | break; |
44 | case 2: { |
45 | int v16 = v * 0x0101; |
46 | do { |
47 | AV_WN16A(ptr, v16); |
48 | ptr += stride; |
49 | } while (--h); |
50 | break; |
51 | } |
52 | case 4: { |
53 | uint32_t v32 = v * 0x01010101; |
54 | do { |
55 | AV_WN32A(ptr, v32); |
56 | ptr += stride; |
57 | } while (--h); |
58 | break; |
59 | } |
60 | case 8: { |
61 | #if HAVE_FAST_64BIT |
62 | uint64_t v64 = v * 0x0101010101010101ULL; |
63 | do { |
64 | AV_WN64A(ptr, v64); |
65 | ptr += stride; |
66 | } while (--h); |
67 | #else |
68 | uint32_t v32 = v * 0x01010101; |
69 | do { |
70 | AV_WN32A(ptr, v32); |
71 | AV_WN32A(ptr + 4, v32); |
72 | ptr += stride; |
73 | } while (--h); |
74 | #endif |
75 | break; |
76 | } |
77 | } |
78 | } |
79 | |
80 | static void decode_mode(AVCodecContext *avctx) |
81 | { |
82 | static const uint8_t left_ctx[N_BS_SIZES] = { |
83 | 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf |
84 | }; |
85 | static const uint8_t above_ctx[N_BS_SIZES] = { |
86 | 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf |
87 | }; |
88 | static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = { |
89 | TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16, |
90 | TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4 |
91 | }; |
92 | VP9Context *s = avctx->priv_data; |
93 | VP9Block *b = s->b; |
94 | int row = s->row, col = s->col, row7 = s->row7; |
95 | enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs]; |
96 | int bw4 = ff_vp9_bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4); |
97 | int bh4 = ff_vp9_bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y; |
98 | int have_a = row > 0, have_l = col > s->tile_col_start; |
99 | int vref, filter_id; |
100 | |
101 | if (!s->s.h.segmentation.enabled) { |
102 | b->seg_id = 0; |
103 | } else if (s->s.h.keyframe || s->s.h.intraonly) { |
104 | b->seg_id = !s->s.h.segmentation.update_map ? 0 : |
105 | vp8_rac_get_tree(&s->c, ff_vp9_segmentation_tree, s->s.h.segmentation.prob); |
106 | } else if (!s->s.h.segmentation.update_map || |
107 | (s->s.h.segmentation.temporal && |
108 | vp56_rac_get_prob_branchy(&s->c, |
109 | s->s.h.segmentation.pred_prob[s->above_segpred_ctx[col] + |
110 | s->left_segpred_ctx[row7]]))) { |
111 | if (!s->s.h.errorres && s->s.frames[REF_FRAME_SEGMAP].segmentation_map) { |
112 | int pred = 8, x; |
113 | uint8_t *refsegmap = s->s.frames[REF_FRAME_SEGMAP].segmentation_map; |
114 | |
115 | if (!s->s.frames[REF_FRAME_SEGMAP].uses_2pass) |
116 | ff_thread_await_progress(&s->s.frames[REF_FRAME_SEGMAP].tf, row >> 3, 0); |
117 | for (y = 0; y < h4; y++) { |
118 | int idx_base = (y + row) * 8 * s->sb_cols + col; |
119 | for (x = 0; x < w4; x++) |
120 | pred = FFMIN(pred, refsegmap[idx_base + x]); |
121 | } |
122 | av_assert1(pred < 8); |
123 | b->seg_id = pred; |
124 | } else { |
125 | b->seg_id = 0; |
126 | } |
127 | |
128 | memset(&s->above_segpred_ctx[col], 1, w4); |
129 | memset(&s->left_segpred_ctx[row7], 1, h4); |
130 | } else { |
131 | b->seg_id = vp8_rac_get_tree(&s->c, ff_vp9_segmentation_tree, |
132 | s->s.h.segmentation.prob); |
133 | |
134 | memset(&s->above_segpred_ctx[col], 0, w4); |
135 | memset(&s->left_segpred_ctx[row7], 0, h4); |
136 | } |
137 | if (s->s.h.segmentation.enabled && |
138 | (s->s.h.segmentation.update_map || s->s.h.keyframe || s->s.h.intraonly)) { |
139 | setctx_2d(&s->s.frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col], |
140 | bw4, bh4, 8 * s->sb_cols, b->seg_id); |
141 | } |
142 | |
143 | b->skip = s->s.h.segmentation.enabled && |
144 | s->s.h.segmentation.feat[b->seg_id].skip_enabled; |
145 | if (!b->skip) { |
146 | int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col]; |
147 | b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]); |
148 | s->counts.skip[c][b->skip]++; |
149 | } |
150 | |
151 | if (s->s.h.keyframe || s->s.h.intraonly) { |
152 | b->intra = 1; |
153 | } else if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].ref_enabled) { |
154 | b->intra = !s->s.h.segmentation.feat[b->seg_id].ref_val; |
155 | } else { |
156 | int c, bit; |
157 | |
158 | if (have_a && have_l) { |
159 | c = s->above_intra_ctx[col] + s->left_intra_ctx[row7]; |
160 | c += (c == 2); |
161 | } else { |
162 | c = have_a ? 2 * s->above_intra_ctx[col] : |
163 | have_l ? 2 * s->left_intra_ctx[row7] : 0; |
164 | } |
165 | bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]); |
166 | s->counts.intra[c][bit]++; |
167 | b->intra = !bit; |
168 | } |
169 | |
170 | if ((b->intra || !b->skip) && s->s.h.txfmmode == TX_SWITCHABLE) { |
171 | int c; |
172 | if (have_a) { |
173 | if (have_l) { |
174 | c = (s->above_skip_ctx[col] ? max_tx : |
175 | s->above_txfm_ctx[col]) + |
176 | (s->left_skip_ctx[row7] ? max_tx : |
177 | s->left_txfm_ctx[row7]) > max_tx; |
178 | } else { |
179 | c = s->above_skip_ctx[col] ? 1 : |
180 | (s->above_txfm_ctx[col] * 2 > max_tx); |
181 | } |
182 | } else if (have_l) { |
183 | c = s->left_skip_ctx[row7] ? 1 : |
184 | (s->left_txfm_ctx[row7] * 2 > max_tx); |
185 | } else { |
186 | c = 1; |
187 | } |
188 | switch (max_tx) { |
189 | case TX_32X32: |
190 | b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]); |
191 | if (b->tx) { |
192 | b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]); |
193 | if (b->tx == 2) |
194 | b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]); |
195 | } |
196 | s->counts.tx32p[c][b->tx]++; |
197 | break; |
198 | case TX_16X16: |
199 | b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]); |
200 | if (b->tx) |
201 | b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]); |
202 | s->counts.tx16p[c][b->tx]++; |
203 | break; |
204 | case TX_8X8: |
205 | b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]); |
206 | s->counts.tx8p[c][b->tx]++; |
207 | break; |
208 | case TX_4X4: |
209 | b->tx = TX_4X4; |
210 | break; |
211 | } |
212 | } else { |
213 | b->tx = FFMIN(max_tx, s->s.h.txfmmode); |
214 | } |
215 | |
216 | if (s->s.h.keyframe || s->s.h.intraonly) { |
217 | uint8_t *a = &s->above_mode_ctx[col * 2]; |
218 | uint8_t *l = &s->left_mode_ctx[(row7) << 1]; |
219 | |
220 | b->comp = 0; |
221 | if (b->bs > BS_8x8) { |
222 | // FIXME the memory storage intermediates here aren't really |
223 | // necessary, they're just there to make the code slightly |
224 | // simpler for now |
225 | b->mode[0] = |
226 | a[0] = vp8_rac_get_tree(&s->c, ff_vp9_intramode_tree, |
227 | ff_vp9_default_kf_ymode_probs[a[0]][l[0]]); |
228 | if (b->bs != BS_8x4) { |
229 | b->mode[1] = vp8_rac_get_tree(&s->c, ff_vp9_intramode_tree, |
230 | ff_vp9_default_kf_ymode_probs[a[1]][b->mode[0]]); |
231 | l[0] = |
232 | a[1] = b->mode[1]; |
233 | } else { |
234 | l[0] = |
235 | a[1] = |
236 | b->mode[1] = b->mode[0]; |
237 | } |
238 | if (b->bs != BS_4x8) { |
239 | b->mode[2] = |
240 | a[0] = vp8_rac_get_tree(&s->c, ff_vp9_intramode_tree, |
241 | ff_vp9_default_kf_ymode_probs[a[0]][l[1]]); |
242 | if (b->bs != BS_8x4) { |
243 | b->mode[3] = vp8_rac_get_tree(&s->c, ff_vp9_intramode_tree, |
244 | ff_vp9_default_kf_ymode_probs[a[1]][b->mode[2]]); |
245 | l[1] = |
246 | a[1] = b->mode[3]; |
247 | } else { |
248 | l[1] = |
249 | a[1] = |
250 | b->mode[3] = b->mode[2]; |
251 | } |
252 | } else { |
253 | b->mode[2] = b->mode[0]; |
254 | l[1] = |
255 | a[1] = |
256 | b->mode[3] = b->mode[1]; |
257 | } |
258 | } else { |
259 | b->mode[0] = vp8_rac_get_tree(&s->c, ff_vp9_intramode_tree, |
260 | ff_vp9_default_kf_ymode_probs[*a][*l]); |
261 | b->mode[3] = |
262 | b->mode[2] = |
263 | b->mode[1] = b->mode[0]; |
264 | // FIXME this can probably be optimized |
265 | memset(a, b->mode[0], ff_vp9_bwh_tab[0][b->bs][0]); |
266 | memset(l, b->mode[0], ff_vp9_bwh_tab[0][b->bs][1]); |
267 | } |
268 | b->uvmode = vp8_rac_get_tree(&s->c, ff_vp9_intramode_tree, |
269 | ff_vp9_default_kf_uvmode_probs[b->mode[3]]); |
270 | } else if (b->intra) { |
271 | b->comp = 0; |
272 | if (b->bs > BS_8x8) { |
273 | b->mode[0] = vp8_rac_get_tree(&s->c, ff_vp9_intramode_tree, |
274 | s->prob.p.y_mode[0]); |
275 | s->counts.y_mode[0][b->mode[0]]++; |
276 | if (b->bs != BS_8x4) { |
277 | b->mode[1] = vp8_rac_get_tree(&s->c, ff_vp9_intramode_tree, |
278 | s->prob.p.y_mode[0]); |
279 | s->counts.y_mode[0][b->mode[1]]++; |
280 | } else { |
281 | b->mode[1] = b->mode[0]; |
282 | } |
283 | if (b->bs != BS_4x8) { |
284 | b->mode[2] = vp8_rac_get_tree(&s->c, ff_vp9_intramode_tree, |
285 | s->prob.p.y_mode[0]); |
286 | s->counts.y_mode[0][b->mode[2]]++; |
287 | if (b->bs != BS_8x4) { |
288 | b->mode[3] = vp8_rac_get_tree(&s->c, ff_vp9_intramode_tree, |
289 | s->prob.p.y_mode[0]); |
290 | s->counts.y_mode[0][b->mode[3]]++; |
291 | } else { |
292 | b->mode[3] = b->mode[2]; |
293 | } |
294 | } else { |
295 | b->mode[2] = b->mode[0]; |
296 | b->mode[3] = b->mode[1]; |
297 | } |
298 | } else { |
299 | static const uint8_t size_group[10] = { |
300 | 3, 3, 3, 3, 2, 2, 2, 1, 1, 1 |
301 | }; |
302 | int sz = size_group[b->bs]; |
303 | |
304 | b->mode[0] = vp8_rac_get_tree(&s->c, ff_vp9_intramode_tree, |
305 | s->prob.p.y_mode[sz]); |
306 | b->mode[1] = |
307 | b->mode[2] = |
308 | b->mode[3] = b->mode[0]; |
309 | s->counts.y_mode[sz][b->mode[3]]++; |
310 | } |
311 | b->uvmode = vp8_rac_get_tree(&s->c, ff_vp9_intramode_tree, |
312 | s->prob.p.uv_mode[b->mode[3]]); |
313 | s->counts.uv_mode[b->mode[3]][b->uvmode]++; |
314 | } else { |
315 | static const uint8_t inter_mode_ctx_lut[14][14] = { |
316 | { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 }, |
317 | { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 }, |
318 | { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 }, |
319 | { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 }, |
320 | { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 }, |
321 | { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 }, |
322 | { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 }, |
323 | { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 }, |
324 | { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 }, |
325 | { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 }, |
326 | { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 }, |
327 | { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 }, |
328 | { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 }, |
329 | { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 }, |
330 | }; |
331 | |
332 | if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].ref_enabled) { |
333 | av_assert2(s->s.h.segmentation.feat[b->seg_id].ref_val != 0); |
334 | b->comp = 0; |
335 | b->ref[0] = s->s.h.segmentation.feat[b->seg_id].ref_val - 1; |
336 | } else { |
337 | // read comp_pred flag |
338 | if (s->s.h.comppredmode != PRED_SWITCHABLE) { |
339 | b->comp = s->s.h.comppredmode == PRED_COMPREF; |
340 | } else { |
341 | int c; |
342 | |
343 | // FIXME add intra as ref=0xff (or -1) to make these easier? |
344 | if (have_a) { |
345 | if (have_l) { |
346 | if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) { |
347 | c = 4; |
348 | } else if (s->above_comp_ctx[col]) { |
349 | c = 2 + (s->left_intra_ctx[row7] || |
350 | s->left_ref_ctx[row7] == s->s.h.fixcompref); |
351 | } else if (s->left_comp_ctx[row7]) { |
352 | c = 2 + (s->above_intra_ctx[col] || |
353 | s->above_ref_ctx[col] == s->s.h.fixcompref); |
354 | } else { |
355 | c = (!s->above_intra_ctx[col] && |
356 | s->above_ref_ctx[col] == s->s.h.fixcompref) ^ |
357 | (!s->left_intra_ctx[row7] && |
358 | s->left_ref_ctx[row & 7] == s->s.h.fixcompref); |
359 | } |
360 | } else { |
361 | c = s->above_comp_ctx[col] ? 3 : |
362 | (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->s.h.fixcompref); |
363 | } |
364 | } else if (have_l) { |
365 | c = s->left_comp_ctx[row7] ? 3 : |
366 | (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->s.h.fixcompref); |
367 | } else { |
368 | c = 1; |
369 | } |
370 | b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]); |
371 | s->counts.comp[c][b->comp]++; |
372 | } |
373 | |
374 | // read actual references |
375 | // FIXME probably cache a few variables here to prevent repetitive |
376 | // memory accesses below |
377 | if (b->comp) { /* two references */ |
378 | int fix_idx = s->s.h.signbias[s->s.h.fixcompref], var_idx = !fix_idx, c, bit; |
379 | |
380 | b->ref[fix_idx] = s->s.h.fixcompref; |
381 | // FIXME can this codeblob be replaced by some sort of LUT? |
382 | if (have_a) { |
383 | if (have_l) { |
384 | if (s->above_intra_ctx[col]) { |
385 | if (s->left_intra_ctx[row7]) { |
386 | c = 2; |
387 | } else { |
388 | c = 1 + 2 * (s->left_ref_ctx[row7] != s->s.h.varcompref[1]); |
389 | } |
390 | } else if (s->left_intra_ctx[row7]) { |
391 | c = 1 + 2 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]); |
392 | } else { |
393 | int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col]; |
394 | |
395 | if (refl == refa && refa == s->s.h.varcompref[1]) { |
396 | c = 0; |
397 | } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) { |
398 | if ((refa == s->s.h.fixcompref && refl == s->s.h.varcompref[0]) || |
399 | (refl == s->s.h.fixcompref && refa == s->s.h.varcompref[0])) { |
400 | c = 4; |
401 | } else { |
402 | c = (refa == refl) ? 3 : 1; |
403 | } |
404 | } else if (!s->left_comp_ctx[row7]) { |
405 | if (refa == s->s.h.varcompref[1] && refl != s->s.h.varcompref[1]) { |
406 | c = 1; |
407 | } else { |
408 | c = (refl == s->s.h.varcompref[1] && |
409 | refa != s->s.h.varcompref[1]) ? 2 : 4; |
410 | } |
411 | } else if (!s->above_comp_ctx[col]) { |
412 | if (refl == s->s.h.varcompref[1] && refa != s->s.h.varcompref[1]) { |
413 | c = 1; |
414 | } else { |
415 | c = (refa == s->s.h.varcompref[1] && |
416 | refl != s->s.h.varcompref[1]) ? 2 : 4; |
417 | } |
418 | } else { |
419 | c = (refl == refa) ? 4 : 2; |
420 | } |
421 | } |
422 | } else { |
423 | if (s->above_intra_ctx[col]) { |
424 | c = 2; |
425 | } else if (s->above_comp_ctx[col]) { |
426 | c = 4 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]); |
427 | } else { |
428 | c = 3 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]); |
429 | } |
430 | } |
431 | } else if (have_l) { |
432 | if (s->left_intra_ctx[row7]) { |
433 | c = 2; |
434 | } else if (s->left_comp_ctx[row7]) { |
435 | c = 4 * (s->left_ref_ctx[row7] != s->s.h.varcompref[1]); |
436 | } else { |
437 | c = 3 * (s->left_ref_ctx[row7] != s->s.h.varcompref[1]); |
438 | } |
439 | } else { |
440 | c = 2; |
441 | } |
442 | bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]); |
443 | b->ref[var_idx] = s->s.h.varcompref[bit]; |
444 | s->counts.comp_ref[c][bit]++; |
445 | } else /* single reference */ { |
446 | int bit, c; |
447 | |
448 | if (have_a && !s->above_intra_ctx[col]) { |
449 | if (have_l && !s->left_intra_ctx[row7]) { |
450 | if (s->left_comp_ctx[row7]) { |
451 | if (s->above_comp_ctx[col]) { |
452 | c = 1 + (!s->s.h.fixcompref || !s->left_ref_ctx[row7] || |
453 | !s->above_ref_ctx[col]); |
454 | } else { |
455 | c = (3 * !s->above_ref_ctx[col]) + |
456 | (!s->s.h.fixcompref || !s->left_ref_ctx[row7]); |
457 | } |
458 | } else if (s->above_comp_ctx[col]) { |
459 | c = (3 * !s->left_ref_ctx[row7]) + |
460 | (!s->s.h.fixcompref || !s->above_ref_ctx[col]); |
461 | } else { |
462 | c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col]; |
463 | } |
464 | } else if (s->above_intra_ctx[col]) { |
465 | c = 2; |
466 | } else if (s->above_comp_ctx[col]) { |
467 | c = 1 + (!s->s.h.fixcompref || !s->above_ref_ctx[col]); |
468 | } else { |
469 | c = 4 * (!s->above_ref_ctx[col]); |
470 | } |
471 | } else if (have_l && !s->left_intra_ctx[row7]) { |
472 | if (s->left_intra_ctx[row7]) { |
473 | c = 2; |
474 | } else if (s->left_comp_ctx[row7]) { |
475 | c = 1 + (!s->s.h.fixcompref || !s->left_ref_ctx[row7]); |
476 | } else { |
477 | c = 4 * (!s->left_ref_ctx[row7]); |
478 | } |
479 | } else { |
480 | c = 2; |
481 | } |
482 | bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]); |
483 | s->counts.single_ref[c][0][bit]++; |
484 | if (!bit) { |
485 | b->ref[0] = 0; |
486 | } else { |
487 | // FIXME can this codeblob be replaced by some sort of LUT? |
488 | if (have_a) { |
489 | if (have_l) { |
490 | if (s->left_intra_ctx[row7]) { |
491 | if (s->above_intra_ctx[col]) { |
492 | c = 2; |
493 | } else if (s->above_comp_ctx[col]) { |
494 | c = 1 + 2 * (s->s.h.fixcompref == 1 || |
495 | s->above_ref_ctx[col] == 1); |
496 | } else if (!s->above_ref_ctx[col]) { |
497 | c = 3; |
498 | } else { |
499 | c = 4 * (s->above_ref_ctx[col] == 1); |
500 | } |
501 | } else if (s->above_intra_ctx[col]) { |
502 | if (s->left_intra_ctx[row7]) { |
503 | c = 2; |
504 | } else if (s->left_comp_ctx[row7]) { |
505 | c = 1 + 2 * (s->s.h.fixcompref == 1 || |
506 | s->left_ref_ctx[row7] == 1); |
507 | } else if (!s->left_ref_ctx[row7]) { |
508 | c = 3; |
509 | } else { |
510 | c = 4 * (s->left_ref_ctx[row7] == 1); |
511 | } |
512 | } else if (s->above_comp_ctx[col]) { |
513 | if (s->left_comp_ctx[row7]) { |
514 | if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) { |
515 | c = 3 * (s->s.h.fixcompref == 1 || |
516 | s->left_ref_ctx[row7] == 1); |
517 | } else { |
518 | c = 2; |
519 | } |
520 | } else if (!s->left_ref_ctx[row7]) { |
521 | c = 1 + 2 * (s->s.h.fixcompref == 1 || |
522 | s->above_ref_ctx[col] == 1); |
523 | } else { |
524 | c = 3 * (s->left_ref_ctx[row7] == 1) + |
525 | (s->s.h.fixcompref == 1 || s->above_ref_ctx[col] == 1); |
526 | } |
527 | } else if (s->left_comp_ctx[row7]) { |
528 | if (!s->above_ref_ctx[col]) { |
529 | c = 1 + 2 * (s->s.h.fixcompref == 1 || |
530 | s->left_ref_ctx[row7] == 1); |
531 | } else { |
532 | c = 3 * (s->above_ref_ctx[col] == 1) + |
533 | (s->s.h.fixcompref == 1 || s->left_ref_ctx[row7] == 1); |
534 | } |
535 | } else if (!s->above_ref_ctx[col]) { |
536 | if (!s->left_ref_ctx[row7]) { |
537 | c = 3; |
538 | } else { |
539 | c = 4 * (s->left_ref_ctx[row7] == 1); |
540 | } |
541 | } else if (!s->left_ref_ctx[row7]) { |
542 | c = 4 * (s->above_ref_ctx[col] == 1); |
543 | } else { |
544 | c = 2 * (s->left_ref_ctx[row7] == 1) + |
545 | 2 * (s->above_ref_ctx[col] == 1); |
546 | } |
547 | } else { |
548 | if (s->above_intra_ctx[col] || |
549 | (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) { |
550 | c = 2; |
551 | } else if (s->above_comp_ctx[col]) { |
552 | c = 3 * (s->s.h.fixcompref == 1 || s->above_ref_ctx[col] == 1); |
553 | } else { |
554 | c = 4 * (s->above_ref_ctx[col] == 1); |
555 | } |
556 | } |
557 | } else if (have_l) { |
558 | if (s->left_intra_ctx[row7] || |
559 | (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) { |
560 | c = 2; |
561 | } else if (s->left_comp_ctx[row7]) { |
562 | c = 3 * (s->s.h.fixcompref == 1 || s->left_ref_ctx[row7] == 1); |
563 | } else { |
564 | c = 4 * (s->left_ref_ctx[row7] == 1); |
565 | } |
566 | } else { |
567 | c = 2; |
568 | } |
569 | bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]); |
570 | s->counts.single_ref[c][1][bit]++; |
571 | b->ref[0] = 1 + bit; |
572 | } |
573 | } |
574 | } |
575 | |
576 | if (b->bs <= BS_8x8) { |
577 | if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].skip_enabled) { |
578 | b->mode[0] = |
579 | b->mode[1] = |
580 | b->mode[2] = |
581 | b->mode[3] = ZEROMV; |
582 | } else { |
583 | static const uint8_t off[10] = { |
584 | 3, 0, 0, 1, 0, 0, 0, 0, 0, 0 |
585 | }; |
586 | |
587 | // FIXME this needs to use the LUT tables from find_ref_mvs |
588 | // because not all are -1,0/0,-1 |
589 | int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]] |
590 | [s->left_mode_ctx[row7 + off[b->bs]]]; |
591 | |
592 | b->mode[0] = vp8_rac_get_tree(&s->c, ff_vp9_inter_mode_tree, |
593 | s->prob.p.mv_mode[c]); |
594 | b->mode[1] = |
595 | b->mode[2] = |
596 | b->mode[3] = b->mode[0]; |
597 | s->counts.mv_mode[c][b->mode[0] - 10]++; |
598 | } |
599 | } |
600 | |
601 | if (s->s.h.filtermode == FILTER_SWITCHABLE) { |
602 | int c; |
603 | |
604 | if (have_a && s->above_mode_ctx[col] >= NEARESTMV) { |
605 | if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) { |
606 | c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ? |
607 | s->left_filter_ctx[row7] : 3; |
608 | } else { |
609 | c = s->above_filter_ctx[col]; |
610 | } |
611 | } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) { |
612 | c = s->left_filter_ctx[row7]; |
613 | } else { |
614 | c = 3; |
615 | } |
616 | |
617 | filter_id = vp8_rac_get_tree(&s->c, ff_vp9_filter_tree, |
618 | s->prob.p.filter[c]); |
619 | s->counts.filter[c][filter_id]++; |
620 | b->filter = ff_vp9_filter_lut[filter_id]; |
621 | } else { |
622 | b->filter = s->s.h.filtermode; |
623 | } |
624 | |
625 | if (b->bs > BS_8x8) { |
626 | int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]]; |
627 | |
628 | b->mode[0] = vp8_rac_get_tree(&s->c, ff_vp9_inter_mode_tree, |
629 | s->prob.p.mv_mode[c]); |
630 | s->counts.mv_mode[c][b->mode[0] - 10]++; |
631 | ff_vp9_fill_mv(s, b->mv[0], b->mode[0], 0); |
632 | |
633 | if (b->bs != BS_8x4) { |
634 | b->mode[1] = vp8_rac_get_tree(&s->c, ff_vp9_inter_mode_tree, |
635 | s->prob.p.mv_mode[c]); |
636 | s->counts.mv_mode[c][b->mode[1] - 10]++; |
637 | ff_vp9_fill_mv(s, b->mv[1], b->mode[1], 1); |
638 | } else { |
639 | b->mode[1] = b->mode[0]; |
640 | AV_COPY32(&b->mv[1][0], &b->mv[0][0]); |
641 | AV_COPY32(&b->mv[1][1], &b->mv[0][1]); |
642 | } |
643 | |
644 | if (b->bs != BS_4x8) { |
645 | b->mode[2] = vp8_rac_get_tree(&s->c, ff_vp9_inter_mode_tree, |
646 | s->prob.p.mv_mode[c]); |
647 | s->counts.mv_mode[c][b->mode[2] - 10]++; |
648 | ff_vp9_fill_mv(s, b->mv[2], b->mode[2], 2); |
649 | |
650 | if (b->bs != BS_8x4) { |
651 | b->mode[3] = vp8_rac_get_tree(&s->c, ff_vp9_inter_mode_tree, |
652 | s->prob.p.mv_mode[c]); |
653 | s->counts.mv_mode[c][b->mode[3] - 10]++; |
654 | ff_vp9_fill_mv(s, b->mv[3], b->mode[3], 3); |
655 | } else { |
656 | b->mode[3] = b->mode[2]; |
657 | AV_COPY32(&b->mv[3][0], &b->mv[2][0]); |
658 | AV_COPY32(&b->mv[3][1], &b->mv[2][1]); |
659 | } |
660 | } else { |
661 | b->mode[2] = b->mode[0]; |
662 | AV_COPY32(&b->mv[2][0], &b->mv[0][0]); |
663 | AV_COPY32(&b->mv[2][1], &b->mv[0][1]); |
664 | b->mode[3] = b->mode[1]; |
665 | AV_COPY32(&b->mv[3][0], &b->mv[1][0]); |
666 | AV_COPY32(&b->mv[3][1], &b->mv[1][1]); |
667 | } |
668 | } else { |
669 | ff_vp9_fill_mv(s, b->mv[0], b->mode[0], -1); |
670 | AV_COPY32(&b->mv[1][0], &b->mv[0][0]); |
671 | AV_COPY32(&b->mv[2][0], &b->mv[0][0]); |
672 | AV_COPY32(&b->mv[3][0], &b->mv[0][0]); |
673 | AV_COPY32(&b->mv[1][1], &b->mv[0][1]); |
674 | AV_COPY32(&b->mv[2][1], &b->mv[0][1]); |
675 | AV_COPY32(&b->mv[3][1], &b->mv[0][1]); |
676 | } |
677 | |
678 | vref = b->ref[b->comp ? s->s.h.signbias[s->s.h.varcompref[0]] : 0]; |
679 | } |
680 | |
681 | #if HAVE_FAST_64BIT |
682 | #define SPLAT_CTX(var, val, n) \ |
683 | switch (n) { \ |
684 | case 1: var = val; break; \ |
685 | case 2: AV_WN16A(&var, val * 0x0101); break; \ |
686 | case 4: AV_WN32A(&var, val * 0x01010101); break; \ |
687 | case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \ |
688 | case 16: { \ |
689 | uint64_t v64 = val * 0x0101010101010101ULL; \ |
690 | AV_WN64A( &var, v64); \ |
691 | AV_WN64A(&((uint8_t *) &var)[8], v64); \ |
692 | break; \ |
693 | } \ |
694 | } |
695 | #else |
696 | #define SPLAT_CTX(var, val, n) \ |
697 | switch (n) { \ |
698 | case 1: var = val; break; \ |
699 | case 2: AV_WN16A(&var, val * 0x0101); break; \ |
700 | case 4: AV_WN32A(&var, val * 0x01010101); break; \ |
701 | case 8: { \ |
702 | uint32_t v32 = val * 0x01010101; \ |
703 | AV_WN32A( &var, v32); \ |
704 | AV_WN32A(&((uint8_t *) &var)[4], v32); \ |
705 | break; \ |
706 | } \ |
707 | case 16: { \ |
708 | uint32_t v32 = val * 0x01010101; \ |
709 | AV_WN32A( &var, v32); \ |
710 | AV_WN32A(&((uint8_t *) &var)[4], v32); \ |
711 | AV_WN32A(&((uint8_t *) &var)[8], v32); \ |
712 | AV_WN32A(&((uint8_t *) &var)[12], v32); \ |
713 | break; \ |
714 | } \ |
715 | } |
716 | #endif |
717 | |
718 | switch (ff_vp9_bwh_tab[1][b->bs][0]) { |
719 | #define SET_CTXS(dir, off, n) \ |
720 | do { \ |
721 | SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \ |
722 | SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \ |
723 | SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \ |
724 | if (!s->s.h.keyframe && !s->s.h.intraonly) { \ |
725 | SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \ |
726 | SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \ |
727 | SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \ |
728 | if (!b->intra) { \ |
729 | SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \ |
730 | if (s->s.h.filtermode == FILTER_SWITCHABLE) { \ |
731 | SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \ |
732 | } \ |
733 | } \ |
734 | } \ |
735 | } while (0) |
736 | case 1: SET_CTXS(above, col, 1); break; |
737 | case 2: SET_CTXS(above, col, 2); break; |
738 | case 4: SET_CTXS(above, col, 4); break; |
739 | case 8: SET_CTXS(above, col, 8); break; |
740 | } |
741 | switch (ff_vp9_bwh_tab[1][b->bs][1]) { |
742 | case 1: SET_CTXS(left, row7, 1); break; |
743 | case 2: SET_CTXS(left, row7, 2); break; |
744 | case 4: SET_CTXS(left, row7, 4); break; |
745 | case 8: SET_CTXS(left, row7, 8); break; |
746 | } |
747 | #undef SPLAT_CTX |
748 | #undef SET_CTXS |
749 | |
750 | if (!s->s.h.keyframe && !s->s.h.intraonly) { |
751 | if (b->bs > BS_8x8) { |
752 | int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]); |
753 | |
754 | AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]); |
755 | AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]); |
756 | AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0); |
757 | AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1); |
758 | AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]); |
759 | AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]); |
760 | AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0); |
761 | AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1); |
762 | } else { |
763 | int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]); |
764 | |
765 | for (n = 0; n < w4 * 2; n++) { |
766 | AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0); |
767 | AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1); |
768 | } |
769 | for (n = 0; n < h4 * 2; n++) { |
770 | AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0); |
771 | AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1); |
772 | } |
773 | } |
774 | } |
775 | |
776 | // FIXME kinda ugly |
777 | for (y = 0; y < h4; y++) { |
778 | int x, o = (row + y) * s->sb_cols * 8 + col; |
779 | VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[o]; |
780 | |
781 | if (b->intra) { |
782 | for (x = 0; x < w4; x++) { |
783 | mv[x].ref[0] = |
784 | mv[x].ref[1] = -1; |
785 | } |
786 | } else if (b->comp) { |
787 | for (x = 0; x < w4; x++) { |
788 | mv[x].ref[0] = b->ref[0]; |
789 | mv[x].ref[1] = b->ref[1]; |
790 | AV_COPY32(&mv[x].mv[0], &b->mv[3][0]); |
791 | AV_COPY32(&mv[x].mv[1], &b->mv[3][1]); |
792 | } |
793 | } else { |
794 | for (x = 0; x < w4; x++) { |
795 | mv[x].ref[0] = b->ref[0]; |
796 | mv[x].ref[1] = -1; |
797 | AV_COPY32(&mv[x].mv[0], &b->mv[3][0]); |
798 | } |
799 | } |
800 | } |
801 | } |
802 | |
803 | // FIXME merge cnt/eob arguments? |
804 | static av_always_inline int |
805 | decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs, |
806 | int is_tx32x32, int is8bitsperpixel, int bpp, unsigned (*cnt)[6][3], |
807 | unsigned (*eob)[6][2], uint8_t (*p)[6][11], |
808 | int nnz, const int16_t *scan, const int16_t (*nb)[2], |
809 | const int16_t *band_counts, const int16_t *qmul) |
810 | { |
811 | int i = 0, band = 0, band_left = band_counts[band]; |
812 | uint8_t *tp = p[0][nnz]; |
813 | uint8_t cache[1024]; |
814 | |
815 | do { |
816 | int val, rc; |
817 | |
818 | val = vp56_rac_get_prob_branchy(c, tp[0]); // eob |
819 | eob[band][nnz][val]++; |
820 | if (!val) |
821 | break; |
822 | |
823 | skip_eob: |
824 | if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero |
825 | cnt[band][nnz][0]++; |
826 | if (!--band_left) |
827 | band_left = band_counts[++band]; |
828 | cache[scan[i]] = 0; |
829 | nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1; |
830 | tp = p[band][nnz]; |
831 | if (++i == n_coeffs) |
832 | break; //invalid input; blocks should end with EOB |
833 | goto skip_eob; |
834 | } |
835 | |
836 | rc = scan[i]; |
837 | if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one |
838 | cnt[band][nnz][1]++; |
839 | val = 1; |
840 | cache[rc] = 1; |
841 | } else { |
842 | // fill in p[3-10] (model fill) - only once per frame for each pos |
843 | if (!tp[3]) |
844 | memcpy(&tp[3], ff_vp9_model_pareto8[tp[2]], 8); |
845 | |
846 | cnt[band][nnz][2]++; |
847 | if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4 |
848 | if (!vp56_rac_get_prob_branchy(c, tp[4])) { |
849 | cache[rc] = val = 2; |
850 | } else { |
851 | val = 3 + vp56_rac_get_prob(c, tp[5]); |
852 | cache[rc] = 3; |
853 | } |
854 | } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2 |
855 | cache[rc] = 4; |
856 | if (!vp56_rac_get_prob_branchy(c, tp[7])) { |
857 | val = vp56_rac_get_prob(c, 159) + 5; |
858 | } else { |
859 | val = (vp56_rac_get_prob(c, 165) << 1) + 7; |
860 | val += vp56_rac_get_prob(c, 145); |
861 | } |
862 | } else { // cat 3-6 |
863 | cache[rc] = 5; |
864 | if (!vp56_rac_get_prob_branchy(c, tp[8])) { |
865 | if (!vp56_rac_get_prob_branchy(c, tp[9])) { |
866 | val = 11 + (vp56_rac_get_prob(c, 173) << 2); |
867 | val += (vp56_rac_get_prob(c, 148) << 1); |
868 | val += vp56_rac_get_prob(c, 140); |
869 | } else { |
870 | val = 19 + (vp56_rac_get_prob(c, 176) << 3); |
871 | val += (vp56_rac_get_prob(c, 155) << 2); |
872 | val += (vp56_rac_get_prob(c, 140) << 1); |
873 | val += vp56_rac_get_prob(c, 135); |
874 | } |
875 | } else if (!vp56_rac_get_prob_branchy(c, tp[10])) { |
876 | val = (vp56_rac_get_prob(c, 180) << 4) + 35; |
877 | val += (vp56_rac_get_prob(c, 157) << 3); |
878 | val += (vp56_rac_get_prob(c, 141) << 2); |
879 | val += (vp56_rac_get_prob(c, 134) << 1); |
880 | val += vp56_rac_get_prob(c, 130); |
881 | } else { |
882 | val = 67; |
883 | if (!is8bitsperpixel) { |
884 | if (bpp == 12) { |
885 | val += vp56_rac_get_prob(c, 255) << 17; |
886 | val += vp56_rac_get_prob(c, 255) << 16; |
887 | } |
888 | val += (vp56_rac_get_prob(c, 255) << 15); |
889 | val += (vp56_rac_get_prob(c, 255) << 14); |
890 | } |
891 | val += (vp56_rac_get_prob(c, 254) << 13); |
892 | val += (vp56_rac_get_prob(c, 254) << 12); |
893 | val += (vp56_rac_get_prob(c, 254) << 11); |
894 | val += (vp56_rac_get_prob(c, 252) << 10); |
895 | val += (vp56_rac_get_prob(c, 249) << 9); |
896 | val += (vp56_rac_get_prob(c, 243) << 8); |
897 | val += (vp56_rac_get_prob(c, 230) << 7); |
898 | val += (vp56_rac_get_prob(c, 196) << 6); |
899 | val += (vp56_rac_get_prob(c, 177) << 5); |
900 | val += (vp56_rac_get_prob(c, 153) << 4); |
901 | val += (vp56_rac_get_prob(c, 140) << 3); |
902 | val += (vp56_rac_get_prob(c, 133) << 2); |
903 | val += (vp56_rac_get_prob(c, 130) << 1); |
904 | val += vp56_rac_get_prob(c, 129); |
905 | } |
906 | } |
907 | } |
908 | #define STORE_COEF(c, i, v) do { \ |
909 | if (is8bitsperpixel) { \ |
910 | c[i] = v; \ |
911 | } else { \ |
912 | AV_WN32A(&c[i * 2], v); \ |
913 | } \ |
914 | } while (0) |
915 | if (!--band_left) |
916 | band_left = band_counts[++band]; |
917 | if (is_tx32x32) |
918 | STORE_COEF(coef, rc, ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2); |
919 | else |
920 | STORE_COEF(coef, rc, (vp8_rac_get(c) ? -val : val) * qmul[!!i]); |
921 | nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1; |
922 | tp = p[band][nnz]; |
923 | } while (++i < n_coeffs); |
924 | |
925 | return i; |
926 | } |
927 | |
928 | static int decode_coeffs_b_8bpp(VP9Context *s, int16_t *coef, int n_coeffs, |
929 | unsigned (*cnt)[6][3], unsigned (*eob)[6][2], |
930 | uint8_t (*p)[6][11], int nnz, const int16_t *scan, |
931 | const int16_t (*nb)[2], const int16_t *band_counts, |
932 | const int16_t *qmul) |
933 | { |
934 | return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 1, 8, cnt, eob, p, |
935 | nnz, scan, nb, band_counts, qmul); |
936 | } |
937 | |
938 | static int decode_coeffs_b32_8bpp(VP9Context *s, int16_t *coef, int n_coeffs, |
939 | unsigned (*cnt)[6][3], unsigned (*eob)[6][2], |
940 | uint8_t (*p)[6][11], int nnz, const int16_t *scan, |
941 | const int16_t (*nb)[2], const int16_t *band_counts, |
942 | const int16_t *qmul) |
943 | { |
944 | return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 1, 8, cnt, eob, p, |
945 | nnz, scan, nb, band_counts, qmul); |
946 | } |
947 | |
948 | static int decode_coeffs_b_16bpp(VP9Context *s, int16_t *coef, int n_coeffs, |
949 | unsigned (*cnt)[6][3], unsigned (*eob)[6][2], |
950 | uint8_t (*p)[6][11], int nnz, const int16_t *scan, |
951 | const int16_t (*nb)[2], const int16_t *band_counts, |
952 | const int16_t *qmul) |
953 | { |
954 | return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 0, s->s.h.bpp, cnt, eob, p, |
955 | nnz, scan, nb, band_counts, qmul); |
956 | } |
957 | |
958 | static int decode_coeffs_b32_16bpp(VP9Context *s, int16_t *coef, int n_coeffs, |
959 | unsigned (*cnt)[6][3], unsigned (*eob)[6][2], |
960 | uint8_t (*p)[6][11], int nnz, const int16_t *scan, |
961 | const int16_t (*nb)[2], const int16_t *band_counts, |
962 | const int16_t *qmul) |
963 | { |
964 | return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 0, s->s.h.bpp, cnt, eob, p, |
965 | nnz, scan, nb, band_counts, qmul); |
966 | } |
967 | |
968 | static av_always_inline int decode_coeffs(AVCodecContext *avctx, int is8bitsperpixel) |
969 | { |
970 | VP9Context *s = avctx->priv_data; |
971 | VP9Block *b = s->b; |
972 | int row = s->row, col = s->col; |
973 | uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra]; |
974 | unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra]; |
975 | unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra]; |
976 | int w4 = ff_vp9_bwh_tab[1][b->bs][0] << 1, h4 = ff_vp9_bwh_tab[1][b->bs][1] << 1; |
977 | int end_x = FFMIN(2 * (s->cols - col), w4); |
978 | int end_y = FFMIN(2 * (s->rows - row), h4); |
979 | int n, pl, x, y, ret; |
980 | int16_t (*qmul)[2] = s->s.h.segmentation.feat[b->seg_id].qmul; |
981 | int tx = 4 * s->s.h.lossless + b->tx; |
982 | const int16_t * const *yscans = ff_vp9_scans[tx]; |
983 | const int16_t (* const *ynbs)[2] = ff_vp9_scans_nb[tx]; |
984 | const int16_t *uvscan = ff_vp9_scans[b->uvtx][DCT_DCT]; |
985 | const int16_t (*uvnb)[2] = ff_vp9_scans_nb[b->uvtx][DCT_DCT]; |
986 | uint8_t *a = &s->above_y_nnz_ctx[col * 2]; |
987 | uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1]; |
988 | static const int16_t band_counts[4][8] = { |
989 | { 1, 2, 3, 4, 3, 16 - 13 }, |
990 | { 1, 2, 3, 4, 11, 64 - 21 }, |
991 | { 1, 2, 3, 4, 11, 256 - 21 }, |
992 | { 1, 2, 3, 4, 11, 1024 - 21 }, |
993 | }; |
994 | const int16_t *y_band_counts = band_counts[b->tx]; |
995 | const int16_t *uv_band_counts = band_counts[b->uvtx]; |
996 | int bytesperpixel = is8bitsperpixel ? 1 : 2; |
997 | int total_coeff = 0; |
998 | |
999 | #define MERGE(la, end, step, rd) \ |
1000 | for (n = 0; n < end; n += step) \ |
1001 | la[n] = !!rd(&la[n]) |
1002 | #define MERGE_CTX(step, rd) \ |
1003 | do { \ |
1004 | MERGE(l, end_y, step, rd); \ |
1005 | MERGE(a, end_x, step, rd); \ |
1006 | } while (0) |
1007 | |
1008 | #define DECODE_Y_COEF_LOOP(step, mode_index, v) \ |
1009 | for (n = 0, y = 0; y < end_y; y += step) { \ |
1010 | for (x = 0; x < end_x; x += step, n += step * step) { \ |
1011 | enum TxfmType txtp = ff_vp9_intra_txfm_type[b->mode[mode_index]]; \ |
1012 | ret = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \ |
1013 | (s, s->block + 16 * n * bytesperpixel, 16 * step * step, \ |
1014 | c, e, p, a[x] + l[y], yscans[txtp], \ |
1015 | ynbs[txtp], y_band_counts, qmul[0]); \ |
1016 | a[x] = l[y] = !!ret; \ |
1017 | total_coeff |= !!ret; \ |
1018 | if (step >= 4) { \ |
1019 | AV_WN16A(&s->eob[n], ret); \ |
1020 | } else { \ |
1021 | s->eob[n] = ret; \ |
1022 | } \ |
1023 | } \ |
1024 | } |
1025 | |
1026 | #define SPLAT(la, end, step, cond) \ |
1027 | if (step == 2) { \ |
1028 | for (n = 1; n < end; n += step) \ |
1029 | la[n] = la[n - 1]; \ |
1030 | } else if (step == 4) { \ |
1031 | if (cond) { \ |
1032 | for (n = 0; n < end; n += step) \ |
1033 | AV_WN32A(&la[n], la[n] * 0x01010101); \ |
1034 | } else { \ |
1035 | for (n = 0; n < end; n += step) \ |
1036 | memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \ |
1037 | } \ |
1038 | } else /* step == 8 */ { \ |
1039 | if (cond) { \ |
1040 | if (HAVE_FAST_64BIT) { \ |
1041 | for (n = 0; n < end; n += step) \ |
1042 | AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \ |
1043 | } else { \ |
1044 | for (n = 0; n < end; n += step) { \ |
1045 | uint32_t v32 = la[n] * 0x01010101; \ |
1046 | AV_WN32A(&la[n], v32); \ |
1047 | AV_WN32A(&la[n + 4], v32); \ |
1048 | } \ |
1049 | } \ |
1050 | } else { \ |
1051 | for (n = 0; n < end; n += step) \ |
1052 | memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \ |
1053 | } \ |
1054 | } |
1055 | #define SPLAT_CTX(step) \ |
1056 | do { \ |
1057 | SPLAT(a, end_x, step, end_x == w4); \ |
1058 | SPLAT(l, end_y, step, end_y == h4); \ |
1059 | } while (0) |
1060 | |
1061 | /* y tokens */ |
1062 | switch (b->tx) { |
1063 | case TX_4X4: |
1064 | DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,); |
1065 | break; |
1066 | case TX_8X8: |
1067 | MERGE_CTX(2, AV_RN16A); |
1068 | DECODE_Y_COEF_LOOP(2, 0,); |
1069 | SPLAT_CTX(2); |
1070 | break; |
1071 | case TX_16X16: |
1072 | MERGE_CTX(4, AV_RN32A); |
1073 | DECODE_Y_COEF_LOOP(4, 0,); |
1074 | SPLAT_CTX(4); |
1075 | break; |
1076 | case TX_32X32: |
1077 | MERGE_CTX(8, AV_RN64A); |
1078 | DECODE_Y_COEF_LOOP(8, 0, 32); |
1079 | SPLAT_CTX(8); |
1080 | break; |
1081 | } |
1082 | |
1083 | #define DECODE_UV_COEF_LOOP(step, v) \ |
1084 | for (n = 0, y = 0; y < end_y; y += step) { \ |
1085 | for (x = 0; x < end_x; x += step, n += step * step) { \ |
1086 | ret = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \ |
1087 | (s, s->uvblock[pl] + 16 * n * bytesperpixel, \ |
1088 | 16 * step * step, c, e, p, a[x] + l[y], \ |
1089 | uvscan, uvnb, uv_band_counts, qmul[1]); \ |
1090 | a[x] = l[y] = !!ret; \ |
1091 | total_coeff |= !!ret; \ |
1092 | if (step >= 4) { \ |
1093 | AV_WN16A(&s->uveob[pl][n], ret); \ |
1094 | } else { \ |
1095 | s->uveob[pl][n] = ret; \ |
1096 | } \ |
1097 | } \ |
1098 | } |
1099 | |
1100 | p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra]; |
1101 | c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra]; |
1102 | e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra]; |
1103 | w4 >>= s->ss_h; |
1104 | end_x >>= s->ss_h; |
1105 | h4 >>= s->ss_v; |
1106 | end_y >>= s->ss_v; |
1107 | for (pl = 0; pl < 2; pl++) { |
1108 | a = &s->above_uv_nnz_ctx[pl][col << !s->ss_h]; |
1109 | l = &s->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v]; |
1110 | switch (b->uvtx) { |
1111 | case TX_4X4: |
1112 | DECODE_UV_COEF_LOOP(1,); |
1113 | break; |
1114 | case TX_8X8: |
1115 | MERGE_CTX(2, AV_RN16A); |
1116 | DECODE_UV_COEF_LOOP(2,); |
1117 | SPLAT_CTX(2); |
1118 | break; |
1119 | case TX_16X16: |
1120 | MERGE_CTX(4, AV_RN32A); |
1121 | DECODE_UV_COEF_LOOP(4,); |
1122 | SPLAT_CTX(4); |
1123 | break; |
1124 | case TX_32X32: |
1125 | MERGE_CTX(8, AV_RN64A); |
1126 | DECODE_UV_COEF_LOOP(8, 32); |
1127 | SPLAT_CTX(8); |
1128 | break; |
1129 | } |
1130 | } |
1131 | |
1132 | return total_coeff; |
1133 | } |
1134 | |
1135 | static int decode_coeffs_8bpp(AVCodecContext *avctx) |
1136 | { |
1137 | return decode_coeffs(avctx, 1); |
1138 | } |
1139 | |
1140 | static int decode_coeffs_16bpp(AVCodecContext *avctx) |
1141 | { |
1142 | return decode_coeffs(avctx, 0); |
1143 | } |
1144 | |
1145 | static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v, |
1146 | int row_and_7, int col_and_7, |
1147 | int w, int h, int col_end, int row_end, |
1148 | enum TxfmMode tx, int skip_inter) |
1149 | { |
1150 | static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 }; |
1151 | static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 }; |
1152 | |
1153 | // FIXME I'm pretty sure all loops can be replaced by a single LUT if |
1154 | // we make VP9Filter.mask uint64_t (i.e. row/col all single variable) |
1155 | // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then |
1156 | // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7) |
1157 | |
1158 | // the intended behaviour of the vp9 loopfilter is to work on 8-pixel |
1159 | // edges. This means that for UV, we work on two subsampled blocks at |
1160 | // a time, and we only use the topleft block's mode information to set |
1161 | // things like block strength. Thus, for any block size smaller than |
1162 | // 16x16, ignore the odd portion of the block. |
1163 | if (tx == TX_4X4 && (ss_v | ss_h)) { |
1164 | if (h == ss_v) { |
1165 | if (row_and_7 & 1) |
1166 | return; |
1167 | if (!row_end) |
1168 | h += 1; |
1169 | } |
1170 | if (w == ss_h) { |
1171 | if (col_and_7 & 1) |
1172 | return; |
1173 | if (!col_end) |
1174 | w += 1; |
1175 | } |
1176 | } |
1177 | |
1178 | if (tx == TX_4X4 && !skip_inter) { |
1179 | int t = 1 << col_and_7, m_col = (t << w) - t, y; |
1180 | // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide |
1181 | int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8; |
1182 | |
1183 | for (y = row_and_7; y < h + row_and_7; y++) { |
1184 | int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]); |
1185 | |
1186 | mask[0][y][1] |= m_row_8; |
1187 | mask[0][y][2] |= m_row_4; |
1188 | // for odd lines, if the odd col is not being filtered, |
1189 | // skip odd row also: |
1190 | // .---. <-- a |
1191 | // | | |
1192 | // |___| <-- b |
1193 | // ^ ^ |
1194 | // c d |
1195 | // |
1196 | // if a/c are even row/col and b/d are odd, and d is skipped, |
1197 | // e.g. right edge of size-66x66.webm, then skip b also (bug) |
1198 | if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) { |
1199 | mask[1][y][col_mask_id] |= (t << (w - 1)) - t; |
1200 | } else { |
1201 | mask[1][y][col_mask_id] |= m_col; |
1202 | } |
1203 | if (!ss_h) |
1204 | mask[0][y][3] |= m_col; |
1205 | if (!ss_v) { |
1206 | if (ss_h && (col_end & 1)) |
1207 | mask[1][y][3] |= (t << (w - 1)) - t; |
1208 | else |
1209 | mask[1][y][3] |= m_col; |
1210 | } |
1211 | } |
1212 | } else { |
1213 | int y, t = 1 << col_and_7, m_col = (t << w) - t; |
1214 | |
1215 | if (!skip_inter) { |
1216 | int mask_id = (tx == TX_8X8); |
1217 | int l2 = tx + ss_h - 1, step1d; |
1218 | static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 }; |
1219 | int m_row = m_col & masks[l2]; |
1220 | |
1221 | // at odd UV col/row edges tx16/tx32 loopfilter edges, force |
1222 | // 8wd loopfilter to prevent going off the visible edge. |
1223 | if (ss_h && tx > TX_8X8 && (w ^ (w - 1)) == 1) { |
1224 | int m_row_16 = ((t << (w - 1)) - t) & masks[l2]; |
1225 | int m_row_8 = m_row - m_row_16; |
1226 | |
1227 | for (y = row_and_7; y < h + row_and_7; y++) { |
1228 | mask[0][y][0] |= m_row_16; |
1229 | mask[0][y][1] |= m_row_8; |
1230 | } |
1231 | } else { |
1232 | for (y = row_and_7; y < h + row_and_7; y++) |
1233 | mask[0][y][mask_id] |= m_row; |
1234 | } |
1235 | |
1236 | l2 = tx + ss_v - 1; |
1237 | step1d = 1 << l2; |
1238 | if (ss_v && tx > TX_8X8 && (h ^ (h - 1)) == 1) { |
1239 | for (y = row_and_7; y < h + row_and_7 - 1; y += step1d) |
1240 | mask[1][y][0] |= m_col; |
1241 | if (y - row_and_7 == h - 1) |
1242 | mask[1][y][1] |= m_col; |
1243 | } else { |
1244 | for (y = row_and_7; y < h + row_and_7; y += step1d) |
1245 | mask[1][y][mask_id] |= m_col; |
1246 | } |
1247 | } else if (tx != TX_4X4) { |
1248 | int mask_id; |
1249 | |
1250 | mask_id = (tx == TX_8X8) || (h == ss_v); |
1251 | mask[1][row_and_7][mask_id] |= m_col; |
1252 | mask_id = (tx == TX_8X8) || (w == ss_h); |
1253 | for (y = row_and_7; y < h + row_and_7; y++) |
1254 | mask[0][y][mask_id] |= t; |
1255 | } else { |
1256 | int t8 = t & wide_filter_col_mask[ss_h], t4 = t - t8; |
1257 | |
1258 | for (y = row_and_7; y < h + row_and_7; y++) { |
1259 | mask[0][y][2] |= t4; |
1260 | mask[0][y][1] |= t8; |
1261 | } |
1262 | mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col; |
1263 | } |
1264 | } |
1265 | } |
1266 | |
1267 | void ff_vp9_decode_block(AVCodecContext *avctx, int row, int col, |
1268 | VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff, |
1269 | enum BlockLevel bl, enum BlockPartition bp) |
1270 | { |
1271 | VP9Context *s = avctx->priv_data; |
1272 | VP9Block *b = s->b; |
1273 | enum BlockSize bs = bl * 3 + bp; |
1274 | int bytesperpixel = s->bytesperpixel; |
1275 | int w4 = ff_vp9_bwh_tab[1][bs][0], h4 = ff_vp9_bwh_tab[1][bs][1], lvl; |
1276 | int emu[2]; |
1277 | AVFrame *f = s->s.frames[CUR_FRAME].tf.f; |
1278 | |
1279 | s->row = row; |
1280 | s->row7 = row & 7; |
1281 | s->col = col; |
1282 | s->col7 = col & 7; |
1283 | |
1284 | s->min_mv.x = -(128 + col * 64); |
1285 | s->min_mv.y = -(128 + row * 64); |
1286 | s->max_mv.x = 128 + (s->cols - col - w4) * 64; |
1287 | s->max_mv.y = 128 + (s->rows - row - h4) * 64; |
1288 | |
1289 | if (s->pass < 2) { |
1290 | b->bs = bs; |
1291 | b->bl = bl; |
1292 | b->bp = bp; |
1293 | decode_mode(avctx); |
1294 | b->uvtx = b->tx - ((s->ss_h && w4 * 2 == (1 << b->tx)) || |
1295 | (s->ss_v && h4 * 2 == (1 << b->tx))); |
1296 | |
1297 | if (!b->skip) { |
1298 | int has_coeffs; |
1299 | |
1300 | if (bytesperpixel == 1) { |
1301 | has_coeffs = decode_coeffs_8bpp(avctx); |
1302 | } else { |
1303 | has_coeffs = decode_coeffs_16bpp(avctx); |
1304 | } |
1305 | if (!has_coeffs && b->bs <= BS_8x8 && !b->intra) { |
1306 | b->skip = 1; |
1307 | memset(&s->above_skip_ctx[col], 1, w4); |
1308 | memset(&s->left_skip_ctx[s->row7], 1, h4); |
1309 | } |
1310 | } else { |
1311 | int row7 = s->row7; |
1312 | |
1313 | #define SPLAT_ZERO_CTX(v, n) \ |
1314 | switch (n) { \ |
1315 | case 1: v = 0; break; \ |
1316 | case 2: AV_ZERO16(&v); break; \ |
1317 | case 4: AV_ZERO32(&v); break; \ |
1318 | case 8: AV_ZERO64(&v); break; \ |
1319 | case 16: AV_ZERO128(&v); break; \ |
1320 | } |
1321 | #define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \ |
1322 | do { \ |
1323 | SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \ |
1324 | if (s->ss_##dir2) { \ |
1325 | SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \ |
1326 | SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \ |
1327 | } else { \ |
1328 | SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off * 2], n * 2); \ |
1329 | SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off * 2], n * 2); \ |
1330 | } \ |
1331 | } while (0) |
1332 | |
1333 | switch (w4) { |
1334 | case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1, h); break; |
1335 | case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2, h); break; |
1336 | case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4, h); break; |
1337 | case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8, h); break; |
1338 | } |
1339 | switch (h4) { |
1340 | case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1, v); break; |
1341 | case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2, v); break; |
1342 | case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4, v); break; |
1343 | case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8, v); break; |
1344 | } |
1345 | } |
1346 | |
1347 | if (s->pass == 1) { |
1348 | s->b++; |
1349 | s->block += w4 * h4 * 64 * bytesperpixel; |
1350 | s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v); |
1351 | s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v); |
1352 | s->eob += 4 * w4 * h4; |
1353 | s->uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v); |
1354 | s->uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v); |
1355 | |
1356 | return; |
1357 | } |
1358 | } |
1359 | |
1360 | // emulated overhangs if the stride of the target buffer can't hold. This |
1361 | // makes it possible to support emu-edge and so on even if we have large block |
1362 | // overhangs |
1363 | emu[0] = (col + w4) * 8 * bytesperpixel > f->linesize[0] || |
1364 | (row + h4) > s->rows; |
1365 | emu[1] = ((col + w4) * 8 >> s->ss_h) * bytesperpixel > f->linesize[1] || |
1366 | (row + h4) > s->rows; |
1367 | if (emu[0]) { |
1368 | s->dst[0] = s->tmp_y; |
1369 | s->y_stride = 128; |
1370 | } else { |
1371 | s->dst[0] = f->data[0] + yoff; |
1372 | s->y_stride = f->linesize[0]; |
1373 | } |
1374 | if (emu[1]) { |
1375 | s->dst[1] = s->tmp_uv[0]; |
1376 | s->dst[2] = s->tmp_uv[1]; |
1377 | s->uv_stride = 128; |
1378 | } else { |
1379 | s->dst[1] = f->data[1] + uvoff; |
1380 | s->dst[2] = f->data[2] + uvoff; |
1381 | s->uv_stride = f->linesize[1]; |
1382 | } |
1383 | if (b->intra) { |
1384 | if (s->s.h.bpp > 8) { |
1385 | ff_vp9_intra_recon_16bpp(avctx, yoff, uvoff); |
1386 | } else { |
1387 | ff_vp9_intra_recon_8bpp(avctx, yoff, uvoff); |
1388 | } |
1389 | } else { |
1390 | if (s->s.h.bpp > 8) { |
1391 | ff_vp9_inter_recon_16bpp(avctx); |
1392 | } else { |
1393 | ff_vp9_inter_recon_8bpp(avctx); |
1394 | } |
1395 | } |
1396 | if (emu[0]) { |
1397 | int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0; |
1398 | |
1399 | for (n = 0; o < w; n++) { |
1400 | int bw = 64 >> n; |
1401 | |
1402 | av_assert2(n <= 4); |
1403 | if (w & bw) { |
1404 | s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o * bytesperpixel, f->linesize[0], |
1405 | s->tmp_y + o * bytesperpixel, 128, h, 0, 0); |
1406 | o += bw; |
1407 | } |
1408 | } |
1409 | } |
1410 | if (emu[1]) { |
1411 | int w = FFMIN(s->cols - col, w4) * 8 >> s->ss_h; |
1412 | int h = FFMIN(s->rows - row, h4) * 8 >> s->ss_v, n, o = 0; |
1413 | |
1414 | for (n = s->ss_h; o < w; n++) { |
1415 | int bw = 64 >> n; |
1416 | |
1417 | av_assert2(n <= 4); |
1418 | if (w & bw) { |
1419 | s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o * bytesperpixel, f->linesize[1], |
1420 | s->tmp_uv[0] + o * bytesperpixel, 128, h, 0, 0); |
1421 | s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o * bytesperpixel, f->linesize[2], |
1422 | s->tmp_uv[1] + o * bytesperpixel, 128, h, 0, 0); |
1423 | o += bw; |
1424 | } |
1425 | } |
1426 | } |
1427 | |
1428 | // pick filter level and find edges to apply filter to |
1429 | if (s->s.h.filter.level && |
1430 | (lvl = s->s.h.segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1] |
1431 | [b->mode[3] != ZEROMV]) > 0) { |
1432 | int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4); |
1433 | int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7; |
1434 | |
1435 | setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl); |
1436 | mask_edges(lflvl->mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter); |
1437 | if (s->ss_h || s->ss_v) |
1438 | mask_edges(lflvl->mask[1], s->ss_h, s->ss_v, row7, col7, x_end, y_end, |
1439 | s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0, |
1440 | s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0, |
1441 | b->uvtx, skip_inter); |
1442 | |
1443 | if (!s->filter_lut.lim_lut[lvl]) { |
1444 | int sharp = s->s.h.filter.sharpness; |
1445 | int limit = lvl; |
1446 | |
1447 | if (sharp > 0) { |
1448 | limit >>= (sharp + 3) >> 2; |
1449 | limit = FFMIN(limit, 9 - sharp); |
1450 | } |
1451 | limit = FFMAX(limit, 1); |
1452 | |
1453 | s->filter_lut.lim_lut[lvl] = limit; |
1454 | s->filter_lut.mblim_lut[lvl] = 2 * (lvl + 2) + limit; |
1455 | } |
1456 | } |
1457 | |
1458 | if (s->pass == 2) { |
1459 | s->b++; |
1460 | s->block += w4 * h4 * 64 * bytesperpixel; |
1461 | s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h); |
1462 | s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h); |
1463 | s->eob += 4 * w4 * h4; |
1464 | s->uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h); |
1465 | s->uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h); |
1466 | } |
1467 | } |
1468 |