blob: fd7bd98afd312f0b1838d48f3bff72e9ef567138
1 | /* |
2 | * DXVA2 VP9 HW acceleration. |
3 | * |
4 | * copyright (c) 2015 Hendrik Leppkes |
5 | * |
6 | * This file is part of FFmpeg. |
7 | * |
8 | * FFmpeg is free software; you can redistribute it and/or |
9 | * modify it under the terms of the GNU Lesser General Public |
10 | * License as published by the Free Software Foundation; either |
11 | * version 2.1 of the License, or (at your option) any later version. |
12 | * |
13 | * FFmpeg is distributed in the hope that it will be useful, |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 | * Lesser General Public License for more details. |
17 | * |
18 | * You should have received a copy of the GNU Lesser General Public |
19 | * License along with FFmpeg; if not, write to the Free Software |
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
21 | */ |
22 | |
23 | #include "libavutil/avassert.h" |
24 | #include "libavutil/pixdesc.h" |
25 | |
26 | #include "vp9shared.h" |
27 | |
28 | // The headers above may include w32threads.h, which uses the original |
29 | // _WIN32_WINNT define, while dxva2_internal.h redefines it to target a |
30 | // potentially newer version. |
31 | #include "dxva2_internal.h" |
32 | |
33 | struct vp9_dxva2_picture_context { |
34 | DXVA_PicParams_VP9 pp; |
35 | DXVA_Slice_VPx_Short slice; |
36 | const uint8_t *bitstream; |
37 | unsigned bitstream_size; |
38 | }; |
39 | |
40 | static void fill_picture_entry(DXVA_PicEntry_VPx *pic, |
41 | unsigned index, unsigned flag) |
42 | { |
43 | av_assert0((index & 0x7f) == index && (flag & 0x01) == flag); |
44 | pic->bPicEntry = index | (flag << 7); |
45 | } |
46 | |
47 | static int fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext *ctx, const VP9SharedContext *h, |
48 | DXVA_PicParams_VP9 *pp) |
49 | { |
50 | int i; |
51 | const AVPixFmtDescriptor * pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt); |
52 | |
53 | if (!pixdesc) |
54 | return -1; |
55 | |
56 | memset(pp, 0, sizeof(*pp)); |
57 | |
58 | fill_picture_entry(&pp->CurrPic, ff_dxva2_get_surface_index(avctx, ctx, h->frames[CUR_FRAME].tf.f), 0); |
59 | |
60 | pp->profile = h->h.profile; |
61 | pp->wFormatAndPictureInfoFlags = ((h->h.keyframe == 0) << 0) | |
62 | ((h->h.invisible == 0) << 1) | |
63 | (h->h.errorres << 2) | |
64 | (pixdesc->log2_chroma_w << 3) | /* subsampling_x */ |
65 | (pixdesc->log2_chroma_h << 4) | /* subsampling_y */ |
66 | (0 << 5) | /* extra_plane */ |
67 | (h->h.refreshctx << 6) | |
68 | (h->h.parallelmode << 7) | |
69 | (h->h.intraonly << 8) | |
70 | (h->h.framectxid << 9) | |
71 | (h->h.resetctx << 11) | |
72 | ((h->h.keyframe ? 0 : h->h.highprecisionmvs) << 13) | |
73 | (0 << 14); /* ReservedFormatInfo2Bits */ |
74 | |
75 | pp->width = avctx->width; |
76 | pp->height = avctx->height; |
77 | pp->BitDepthMinus8Luma = pixdesc->comp[0].depth - 8; |
78 | pp->BitDepthMinus8Chroma = pixdesc->comp[1].depth - 8; |
79 | /* swap 0/1 to match the reference */ |
80 | pp->interp_filter = h->h.filtermode ^ (h->h.filtermode <= 1); |
81 | pp->Reserved8Bits = 0; |
82 | |
83 | for (i = 0; i < 8; i++) { |
84 | if (h->refs[i].f->buf[0]) { |
85 | fill_picture_entry(&pp->ref_frame_map[i], ff_dxva2_get_surface_index(avctx, ctx, h->refs[i].f), 0); |
86 | pp->ref_frame_coded_width[i] = h->refs[i].f->width; |
87 | pp->ref_frame_coded_height[i] = h->refs[i].f->height; |
88 | } else |
89 | pp->ref_frame_map[i].bPicEntry = 0xFF; |
90 | } |
91 | |
92 | for (i = 0; i < 3; i++) { |
93 | uint8_t refidx = h->h.refidx[i]; |
94 | if (h->refs[refidx].f->buf[0]) |
95 | fill_picture_entry(&pp->frame_refs[i], ff_dxva2_get_surface_index(avctx, ctx, h->refs[refidx].f), 0); |
96 | else |
97 | pp->frame_refs[i].bPicEntry = 0xFF; |
98 | |
99 | pp->ref_frame_sign_bias[i + 1] = h->h.signbias[i]; |
100 | } |
101 | |
102 | pp->filter_level = h->h.filter.level; |
103 | pp->sharpness_level = h->h.filter.sharpness; |
104 | |
105 | pp->wControlInfoFlags = (h->h.lf_delta.enabled << 0) | |
106 | (h->h.lf_delta.updated << 1) | |
107 | (h->h.use_last_frame_mvs << 2) | |
108 | (0 << 3); /* ReservedControlInfo5Bits */ |
109 | |
110 | for (i = 0; i < 4; i++) |
111 | pp->ref_deltas[i] = h->h.lf_delta.ref[i]; |
112 | |
113 | for (i = 0; i < 2; i++) |
114 | pp->mode_deltas[i] = h->h.lf_delta.mode[i]; |
115 | |
116 | pp->base_qindex = h->h.yac_qi; |
117 | pp->y_dc_delta_q = h->h.ydc_qdelta; |
118 | pp->uv_dc_delta_q = h->h.uvdc_qdelta; |
119 | pp->uv_ac_delta_q = h->h.uvac_qdelta; |
120 | |
121 | /* segmentation data */ |
122 | pp->stVP9Segments.wSegmentInfoFlags = (h->h.segmentation.enabled << 0) | |
123 | (h->h.segmentation.update_map << 1) | |
124 | (h->h.segmentation.temporal << 2) | |
125 | (h->h.segmentation.absolute_vals << 3) | |
126 | (0 << 4); /* ReservedSegmentFlags4Bits */ |
127 | |
128 | for (i = 0; i < 7; i++) |
129 | pp->stVP9Segments.tree_probs[i] = h->h.segmentation.prob[i]; |
130 | |
131 | if (h->h.segmentation.temporal) |
132 | for (i = 0; i < 3; i++) |
133 | pp->stVP9Segments.pred_probs[i] = h->h.segmentation.pred_prob[i]; |
134 | else |
135 | memset(pp->stVP9Segments.pred_probs, 255, sizeof(pp->stVP9Segments.pred_probs)); |
136 | |
137 | for (i = 0; i < 8; i++) { |
138 | pp->stVP9Segments.feature_mask[i] = (h->h.segmentation.feat[i].q_enabled << 0) | |
139 | (h->h.segmentation.feat[i].lf_enabled << 1) | |
140 | (h->h.segmentation.feat[i].ref_enabled << 2) | |
141 | (h->h.segmentation.feat[i].skip_enabled << 3); |
142 | |
143 | pp->stVP9Segments.feature_data[i][0] = h->h.segmentation.feat[i].q_val; |
144 | pp->stVP9Segments.feature_data[i][1] = h->h.segmentation.feat[i].lf_val; |
145 | pp->stVP9Segments.feature_data[i][2] = h->h.segmentation.feat[i].ref_val; |
146 | pp->stVP9Segments.feature_data[i][3] = 0; /* no data for skip */ |
147 | } |
148 | |
149 | pp->log2_tile_cols = h->h.tiling.log2_tile_cols; |
150 | pp->log2_tile_rows = h->h.tiling.log2_tile_rows; |
151 | |
152 | pp->uncompressed_header_size_byte_aligned = h->h.uncompressed_header_size; |
153 | pp->first_partition_size = h->h.compressed_header_size; |
154 | |
155 | pp->StatusReportFeedbackNumber = 1 + DXVA_CONTEXT_REPORT_ID(avctx, ctx)++; |
156 | return 0; |
157 | } |
158 | |
159 | static void fill_slice_short(DXVA_Slice_VPx_Short *slice, |
160 | unsigned position, unsigned size) |
161 | { |
162 | memset(slice, 0, sizeof(*slice)); |
163 | slice->BSNALunitDataLocation = position; |
164 | slice->SliceBytesInBuffer = size; |
165 | slice->wBadSliceChopping = 0; |
166 | } |
167 | |
168 | static int commit_bitstream_and_slice_buffer(AVCodecContext *avctx, |
169 | DECODER_BUFFER_DESC *bs, |
170 | DECODER_BUFFER_DESC *sc) |
171 | { |
172 | const VP9SharedContext *h = avctx->priv_data; |
173 | AVDXVAContext *ctx = avctx->hwaccel_context; |
174 | struct vp9_dxva2_picture_context *ctx_pic = h->frames[CUR_FRAME].hwaccel_picture_private; |
175 | void *dxva_data_ptr; |
176 | uint8_t *dxva_data; |
177 | unsigned dxva_size; |
178 | unsigned padding; |
179 | unsigned type; |
180 | |
181 | #if CONFIG_D3D11VA |
182 | if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD) { |
183 | type = D3D11_VIDEO_DECODER_BUFFER_BITSTREAM; |
184 | if (FAILED(ID3D11VideoContext_GetDecoderBuffer(D3D11VA_CONTEXT(ctx)->video_context, |
185 | D3D11VA_CONTEXT(ctx)->decoder, |
186 | type, |
187 | &dxva_size, &dxva_data_ptr))) |
188 | return -1; |
189 | } |
190 | #endif |
191 | #if CONFIG_DXVA2 |
192 | if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD) { |
193 | type = DXVA2_BitStreamDateBufferType; |
194 | if (FAILED(IDirectXVideoDecoder_GetBuffer(DXVA2_CONTEXT(ctx)->decoder, |
195 | type, |
196 | &dxva_data_ptr, &dxva_size))) |
197 | return -1; |
198 | } |
199 | #endif |
200 | |
201 | dxva_data = dxva_data_ptr; |
202 | |
203 | if (ctx_pic->slice.SliceBytesInBuffer > dxva_size) { |
204 | av_log(avctx, AV_LOG_ERROR, "Failed to build bitstream"); |
205 | return -1; |
206 | } |
207 | |
208 | memcpy(dxva_data, ctx_pic->bitstream, ctx_pic->slice.SliceBytesInBuffer); |
209 | |
210 | padding = FFMIN(128 - ((ctx_pic->slice.SliceBytesInBuffer) & 127), dxva_size - ctx_pic->slice.SliceBytesInBuffer); |
211 | if (padding > 0) { |
212 | memset(dxva_data + ctx_pic->slice.SliceBytesInBuffer, 0, padding); |
213 | ctx_pic->slice.SliceBytesInBuffer += padding; |
214 | } |
215 | |
216 | #if CONFIG_D3D11VA |
217 | if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD) |
218 | if (FAILED(ID3D11VideoContext_ReleaseDecoderBuffer(D3D11VA_CONTEXT(ctx)->video_context, D3D11VA_CONTEXT(ctx)->decoder, type))) |
219 | return -1; |
220 | #endif |
221 | #if CONFIG_DXVA2 |
222 | if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD) |
223 | if (FAILED(IDirectXVideoDecoder_ReleaseBuffer(DXVA2_CONTEXT(ctx)->decoder, type))) |
224 | return -1; |
225 | #endif |
226 | |
227 | #if CONFIG_D3D11VA |
228 | if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD) { |
229 | D3D11_VIDEO_DECODER_BUFFER_DESC *dsc11 = bs; |
230 | memset(dsc11, 0, sizeof(*dsc11)); |
231 | dsc11->BufferType = type; |
232 | dsc11->DataSize = ctx_pic->slice.SliceBytesInBuffer; |
233 | dsc11->NumMBsInBuffer = 0; |
234 | |
235 | type = D3D11_VIDEO_DECODER_BUFFER_SLICE_CONTROL; |
236 | } |
237 | #endif |
238 | #if CONFIG_DXVA2 |
239 | if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD) { |
240 | DXVA2_DecodeBufferDesc *dsc2 = bs; |
241 | memset(dsc2, 0, sizeof(*dsc2)); |
242 | dsc2->CompressedBufferType = type; |
243 | dsc2->DataSize = ctx_pic->slice.SliceBytesInBuffer; |
244 | dsc2->NumMBsInBuffer = 0; |
245 | |
246 | type = DXVA2_SliceControlBufferType; |
247 | } |
248 | #endif |
249 | |
250 | return ff_dxva2_commit_buffer(avctx, ctx, sc, |
251 | type, |
252 | &ctx_pic->slice, sizeof(ctx_pic->slice), 0); |
253 | } |
254 | |
255 | |
256 | static int dxva2_vp9_start_frame(AVCodecContext *avctx, |
257 | av_unused const uint8_t *buffer, |
258 | av_unused uint32_t size) |
259 | { |
260 | const VP9SharedContext *h = avctx->priv_data; |
261 | AVDXVAContext *ctx = avctx->hwaccel_context; |
262 | struct vp9_dxva2_picture_context *ctx_pic = h->frames[CUR_FRAME].hwaccel_picture_private; |
263 | |
264 | if (!DXVA_CONTEXT_VALID(avctx, ctx)) |
265 | return -1; |
266 | av_assert0(ctx_pic); |
267 | |
268 | /* Fill up DXVA_PicParams_VP9 */ |
269 | if (fill_picture_parameters(avctx, ctx, h, &ctx_pic->pp) < 0) |
270 | return -1; |
271 | |
272 | ctx_pic->bitstream_size = 0; |
273 | ctx_pic->bitstream = NULL; |
274 | return 0; |
275 | } |
276 | |
277 | static int dxva2_vp9_decode_slice(AVCodecContext *avctx, |
278 | const uint8_t *buffer, |
279 | uint32_t size) |
280 | { |
281 | const VP9SharedContext *h = avctx->priv_data; |
282 | struct vp9_dxva2_picture_context *ctx_pic = h->frames[CUR_FRAME].hwaccel_picture_private; |
283 | unsigned position; |
284 | |
285 | if (!ctx_pic->bitstream) |
286 | ctx_pic->bitstream = buffer; |
287 | ctx_pic->bitstream_size += size; |
288 | |
289 | position = buffer - ctx_pic->bitstream; |
290 | fill_slice_short(&ctx_pic->slice, position, size); |
291 | |
292 | return 0; |
293 | } |
294 | |
295 | static int dxva2_vp9_end_frame(AVCodecContext *avctx) |
296 | { |
297 | VP9SharedContext *h = avctx->priv_data; |
298 | struct vp9_dxva2_picture_context *ctx_pic = h->frames[CUR_FRAME].hwaccel_picture_private; |
299 | int ret; |
300 | |
301 | if (ctx_pic->bitstream_size <= 0) |
302 | return -1; |
303 | |
304 | ret = ff_dxva2_common_end_frame(avctx, h->frames[CUR_FRAME].tf.f, |
305 | &ctx_pic->pp, sizeof(ctx_pic->pp), |
306 | NULL, 0, |
307 | commit_bitstream_and_slice_buffer); |
308 | return ret; |
309 | } |
310 | |
311 | #if CONFIG_VP9_DXVA2_HWACCEL |
312 | AVHWAccel ff_vp9_dxva2_hwaccel = { |
313 | .name = "vp9_dxva2", |
314 | .type = AVMEDIA_TYPE_VIDEO, |
315 | .id = AV_CODEC_ID_VP9, |
316 | .pix_fmt = AV_PIX_FMT_DXVA2_VLD, |
317 | .start_frame = dxva2_vp9_start_frame, |
318 | .decode_slice = dxva2_vp9_decode_slice, |
319 | .end_frame = dxva2_vp9_end_frame, |
320 | .frame_priv_data_size = sizeof(struct vp9_dxva2_picture_context), |
321 | }; |
322 | #endif |
323 | |
324 | #if CONFIG_VP9_D3D11VA_HWACCEL |
325 | AVHWAccel ff_vp9_d3d11va_hwaccel = { |
326 | .name = "vp9_d3d11va", |
327 | .type = AVMEDIA_TYPE_VIDEO, |
328 | .id = AV_CODEC_ID_VP9, |
329 | .pix_fmt = AV_PIX_FMT_D3D11VA_VLD, |
330 | .start_frame = dxva2_vp9_start_frame, |
331 | .decode_slice = dxva2_vp9_decode_slice, |
332 | .end_frame = dxva2_vp9_end_frame, |
333 | .frame_priv_data_size = sizeof(struct vp9_dxva2_picture_context), |
334 | }; |
335 | #endif |
336 |