blob: 4a7f934d6a6059a65a7994d561919b80b47ab9c1
1 | /* |
2 | * RTP parser for VP9 payload format (draft version 02) - experimental |
3 | * Copyright (c) 2015 Thomas Volkert <thomas@homer-conferencing.com> |
4 | * |
5 | * This file is part of FFmpeg. |
6 | * |
7 | * FFmpeg is free software; you can redistribute it and/or |
8 | * modify it under the terms of the GNU Lesser General Public |
9 | * License as published by the Free Software Foundation; either |
10 | * version 2.1 of the License, or (at your option) any later version. |
11 | * |
12 | * FFmpeg is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | * Lesser General Public License for more details. |
16 | * |
17 | * You should have received a copy of the GNU Lesser General Public |
18 | * License along with FFmpeg; if not, write to the Free Software |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | */ |
21 | |
22 | #include "libavutil/intreadwrite.h" |
23 | |
24 | #include "avio_internal.h" |
25 | #include "rtpdec_formats.h" |
26 | |
27 | #define RTP_VP9_DESC_REQUIRED_SIZE 1 |
28 | |
29 | struct PayloadContext { |
30 | AVIOContext *buf; |
31 | uint32_t timestamp; |
32 | }; |
33 | |
34 | static av_cold int vp9_init(AVFormatContext *ctx, int st_index, |
35 | PayloadContext *data) |
36 | { |
37 | av_log(ctx, AV_LOG_WARNING, |
38 | "RTP/VP9 support is still experimental\n"); |
39 | |
40 | return 0; |
41 | } |
42 | |
43 | static int vp9_handle_packet(AVFormatContext *ctx, PayloadContext *rtp_vp9_ctx, |
44 | AVStream *st, AVPacket *pkt, uint32_t *timestamp, |
45 | const uint8_t *buf, int len, uint16_t seq, |
46 | int flags) |
47 | { |
48 | int has_pic_id, has_layer_idc, has_ref_idc, has_ss_data; |
49 | av_unused int pic_id = 0, non_key_frame = 0, inter_picture_layer_frame; |
50 | av_unused int layer_temporal = -1, layer_spatial = -1, layer_quality = -1; |
51 | int ref_fields = 0, has_ref_field_ext_pic_id = 0; |
52 | int first_fragment, last_fragment; |
53 | int rtp_m; |
54 | int res = 0; |
55 | |
56 | /* drop data of previous packets in case of non-continuous (lossy) packet stream */ |
57 | if (rtp_vp9_ctx->buf && rtp_vp9_ctx->timestamp != *timestamp) |
58 | ffio_free_dyn_buf(&rtp_vp9_ctx->buf); |
59 | |
60 | /* sanity check for size of input packet: 1 byte payload at least */ |
61 | if (len < RTP_VP9_DESC_REQUIRED_SIZE + 1) { |
62 | av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet, got %d bytes\n", len); |
63 | return AVERROR_INVALIDDATA; |
64 | } |
65 | |
66 | /* |
67 | * decode the required VP9 payload descriptor according to section 4.2 of the spec.: |
68 | * |
69 | * 0 1 2 3 4 5 6 7 |
70 | * +-+-+-+-+-+-+-+-+ |
71 | * |I|P|L|F|B|E|V|-| (REQUIRED) |
72 | * +-+-+-+-+-+-+-+-+ |
73 | * |
74 | * I: PictureID present |
75 | * P: Inter-picture predicted layer frame |
76 | * L: Layer indices present |
77 | * F: Flexible mode |
78 | * B: Start of VP9 frame |
79 | * E: End of picture |
80 | * V: Scalability Structure (SS) present |
81 | */ |
82 | has_pic_id = !!(buf[0] & 0x80); |
83 | inter_picture_layer_frame = !!(buf[0] & 0x40); |
84 | has_layer_idc = !!(buf[0] & 0x20); |
85 | has_ref_idc = !!(buf[0] & 0x10); |
86 | first_fragment = !!(buf[0] & 0x08); |
87 | last_fragment = !!(buf[0] & 0x04); |
88 | has_ss_data = !!(buf[0] & 0x02); |
89 | |
90 | rtp_m = !!(flags & RTP_FLAG_MARKER); |
91 | |
92 | /* sanity check for markers: B should always be equal to the RTP M marker */ |
93 | if (last_fragment != rtp_m) { |
94 | av_log(ctx, AV_LOG_ERROR, "Invalid combination of B and M marker (%d != %d)\n", last_fragment, rtp_m); |
95 | return AVERROR_INVALIDDATA; |
96 | } |
97 | |
98 | /* pass the extensions field */ |
99 | buf += RTP_VP9_DESC_REQUIRED_SIZE; |
100 | len -= RTP_VP9_DESC_REQUIRED_SIZE; |
101 | |
102 | /* |
103 | * decode the 1-byte/2-byte picture ID: |
104 | * |
105 | * 0 1 2 3 4 5 6 7 |
106 | * +-+-+-+-+-+-+-+-+ |
107 | * I: |M|PICTURE ID | (RECOMMENDED) |
108 | * +-+-+-+-+-+-+-+-+ |
109 | * M: | EXTENDED PID | (RECOMMENDED) |
110 | * +-+-+-+-+-+-+-+-+ |
111 | * |
112 | * M: The most significant bit of the first octet is an extension flag. |
113 | * PictureID: 8 or 16 bits including the M bit. |
114 | */ |
115 | if (has_pic_id) { |
116 | /* check for 1-byte or 2-byte picture index */ |
117 | if (buf[0] & 0x80) { |
118 | if (len < 2) { |
119 | av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); |
120 | return AVERROR_INVALIDDATA; |
121 | } |
122 | pic_id = AV_RB16(buf) & 0x7fff; |
123 | buf += 2; |
124 | len -= 2; |
125 | } else { |
126 | pic_id = buf[0] & 0x7f; |
127 | buf++; |
128 | len--; |
129 | } |
130 | } |
131 | |
132 | /* |
133 | * decode layer indices |
134 | * |
135 | * 0 1 2 3 4 5 6 7 |
136 | * +-+-+-+-+-+-+-+-+ |
137 | * L: | T | S | Q | R | (CONDITIONALLY RECOMMENDED) |
138 | * +-+-+-+-+-+-+-+-+ |
139 | * |
140 | * T, S and Q are 2-bit indices for temporal, spatial, and quality layers. |
141 | * If "F" is set in the initial octet, R is 2 bits representing the number |
142 | * of reference fields this frame refers to. |
143 | */ |
144 | if (has_layer_idc) { |
145 | if (len < 1) { |
146 | av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); |
147 | return AVERROR_INVALIDDATA; |
148 | } |
149 | layer_temporal = buf[0] & 0xC0; |
150 | layer_spatial = buf[0] & 0x30; |
151 | layer_quality = buf[0] & 0x0C; |
152 | if (has_ref_idc) { |
153 | ref_fields = buf[0] & 0x03; |
154 | if (ref_fields) |
155 | non_key_frame = 1; |
156 | } |
157 | buf++; |
158 | len--; |
159 | } |
160 | |
161 | /* |
162 | * decode the reference fields |
163 | * |
164 | * 0 1 2 3 4 5 6 7 |
165 | * +-+-+-+-+-+-+-+-+ -\ |
166 | * F: | PID |X| RS| RQ| (OPTIONAL) . |
167 | * +-+-+-+-+-+-+-+-+ . - R times |
168 | * X: | EXTENDED PID | (OPTIONAL) . |
169 | * +-+-+-+-+-+-+-+-+ -/ |
170 | * |
171 | * PID: The relative Picture ID referred to by this frame. |
172 | * RS and RQ: The spatial and quality layer IDs. |
173 | * X: 1 if this layer index has an extended relative Picture ID. |
174 | */ |
175 | if (has_ref_idc) { |
176 | while (ref_fields) { |
177 | if (len < 1) { |
178 | av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); |
179 | return AVERROR_INVALIDDATA; |
180 | } |
181 | |
182 | has_ref_field_ext_pic_id = buf[0] & 0x10; |
183 | |
184 | /* pass ref. field */ |
185 | if (has_ref_field_ext_pic_id) { |
186 | if (len < 2) { |
187 | av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); |
188 | return AVERROR_INVALIDDATA; |
189 | } |
190 | |
191 | /* ignore ref. data */ |
192 | |
193 | buf += 2; |
194 | len -= 2; |
195 | } else { |
196 | |
197 | /* ignore ref. data */ |
198 | |
199 | buf++; |
200 | len--; |
201 | } |
202 | ref_fields--; |
203 | } |
204 | } |
205 | |
206 | /* |
207 | * decode the scalability structure (SS) |
208 | * |
209 | * 0 1 2 3 4 5 6 7 |
210 | * +-+-+-+-+-+-+-+-+ |
211 | * V: | PATTERN LENGTH| |
212 | * +-+-+-+-+-+-+-+-+ -\ |
213 | * | T | S | Q | R | (OPTIONAL) . |
214 | * +-+-+-+-+-+-+-+-+ -\ . |
215 | * | PID |X| RS| RQ| (OPTIONAL) . . - PAT. LEN. times |
216 | * +-+-+-+-+-+-+-+-+ . - R times . |
217 | * X: | EXTENDED PID | (OPTIONAL) . . |
218 | * +-+-+-+-+-+-+-+-+ -/ -/ |
219 | * |
220 | * PID: The relative Picture ID referred to by this frame. |
221 | * RS and RQ: The spatial and quality layer IDs. |
222 | * X: 1 if this layer index has an extended relative Picture ID. |
223 | */ |
224 | if (has_ss_data) { |
225 | int n_s, y, g, i; |
226 | if (len < 1) { |
227 | av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); |
228 | return AVERROR_INVALIDDATA; |
229 | } |
230 | n_s = buf[0] >> 5; |
231 | y = !!(buf[0] & 0x10); |
232 | g = !!(buf[0] & 0x08); |
233 | buf++; |
234 | len--; |
235 | if (n_s > 0) { |
236 | avpriv_report_missing_feature(ctx, "VP9 scalability structure with multiple layers"); |
237 | return AVERROR_PATCHWELCOME; |
238 | } |
239 | if (y) { |
240 | if (len < 4 * (n_s + 1)) { |
241 | av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); |
242 | return AVERROR_INVALIDDATA; |
243 | } |
244 | for (i = 0; i < n_s + 1; i++) { |
245 | av_unused int w, h; |
246 | w = AV_RB16(buf); |
247 | h = AV_RB16(buf + 2); |
248 | buf += 4; |
249 | len -= 4; |
250 | } |
251 | } |
252 | if (g) { |
253 | int n_g; |
254 | if (len < 1) { |
255 | av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); |
256 | return AVERROR_INVALIDDATA; |
257 | } |
258 | n_g = buf[0]; |
259 | buf++; |
260 | len--; |
261 | for (i = 0; i < n_g; i++) { |
262 | av_unused int t, u, r, j; |
263 | if (len < 1) { |
264 | av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); |
265 | return AVERROR_INVALIDDATA; |
266 | } |
267 | t = buf[0] >> 5; |
268 | u = !!(buf[0] & 0x10); |
269 | r = (buf[0] >> 2) & 0x03; |
270 | buf++; |
271 | len--; |
272 | if (len < r) { |
273 | av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); |
274 | return AVERROR_INVALIDDATA; |
275 | } |
276 | for (j = 0; j < r; j++) { |
277 | av_unused int p_diff = buf[0]; |
278 | buf++; |
279 | len--; |
280 | } |
281 | } |
282 | } |
283 | } |
284 | |
285 | /* |
286 | * decode the VP9 payload header |
287 | * |
288 | * spec. is tbd |
289 | */ |
290 | //XXX: implement when specified |
291 | |
292 | /* sanity check: 1 byte payload as minimum */ |
293 | if (len < 1) { |
294 | av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); |
295 | return AVERROR_INVALIDDATA; |
296 | } |
297 | |
298 | /* start frame buffering with new dynamic buffer */ |
299 | if (!rtp_vp9_ctx->buf) { |
300 | /* sanity check: a new frame should have started */ |
301 | if (first_fragment) { |
302 | res = avio_open_dyn_buf(&rtp_vp9_ctx->buf); |
303 | if (res < 0) |
304 | return res; |
305 | /* update the timestamp in the frame packet with the one from the RTP packet */ |
306 | rtp_vp9_ctx->timestamp = *timestamp; |
307 | } else { |
308 | /* frame not started yet, need more packets */ |
309 | return AVERROR(EAGAIN); |
310 | } |
311 | } |
312 | |
313 | /* write the fragment to the dyn. buffer */ |
314 | avio_write(rtp_vp9_ctx->buf, buf, len); |
315 | |
316 | /* do we need more fragments? */ |
317 | if (!last_fragment) |
318 | return AVERROR(EAGAIN); |
319 | |
320 | /* close frame buffering and create resulting A/V packet */ |
321 | res = ff_rtp_finalize_packet(pkt, &rtp_vp9_ctx->buf, st->index); |
322 | if (res < 0) |
323 | return res; |
324 | |
325 | return 0; |
326 | } |
327 | |
328 | static void vp9_close_context(PayloadContext *vp9) |
329 | { |
330 | ffio_free_dyn_buf(&vp9->buf); |
331 | } |
332 | |
333 | RTPDynamicProtocolHandler ff_vp9_dynamic_handler = { |
334 | .enc_name = "VP9", |
335 | .codec_type = AVMEDIA_TYPE_VIDEO, |
336 | .codec_id = AV_CODEC_ID_VP9, |
337 | .priv_data_size = sizeof(PayloadContext), |
338 | .init = vp9_init, |
339 | .close = vp9_close_context, |
340 | .parse_packet = vp9_handle_packet |
341 | }; |
342 |