blob: b974e5a6e69db9c7d60895c138bb0266582b7345
1 | /* |
2 | * Microsoft XMV demuxer |
3 | * Copyright (c) 2011 Sven Hesse <drmccoy@drmccoy.de> |
4 | * Copyright (c) 2011 Matthew Hoops <clone2727@gmail.com> |
5 | * |
6 | * This file is part of FFmpeg. |
7 | * |
8 | * FFmpeg is free software; you can redistribute it and/or |
9 | * modify it under the terms of the GNU Lesser General Public |
10 | * License as published by the Free Software Foundation; either |
11 | * version 2.1 of the License, or (at your option) any later version. |
12 | * |
13 | * FFmpeg is distributed in the hope that it will be useful, |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 | * Lesser General Public License for more details. |
17 | * |
18 | * You should have received a copy of the GNU Lesser General Public |
19 | * License along with FFmpeg; if not, write to the Free Software |
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
21 | */ |
22 | |
23 | /** |
24 | * @file |
25 | * Microsoft XMV demuxer |
26 | */ |
27 | |
28 | #include <inttypes.h> |
29 | |
30 | #include "libavutil/intreadwrite.h" |
31 | |
32 | #include "avformat.h" |
33 | #include "internal.h" |
34 | #include "riff.h" |
35 | #include "libavutil/avassert.h" |
36 | |
37 | /** The min size of an XMV header. */ |
38 | #define XMV_MIN_HEADER_SIZE 36 |
39 | |
40 | /** Audio flag: ADPCM'd 5.1 stream, front left / right channels */ |
41 | #define XMV_AUDIO_ADPCM51_FRONTLEFTRIGHT 1 |
42 | /** Audio flag: ADPCM'd 5.1 stream, front center / low frequency channels */ |
43 | #define XMV_AUDIO_ADPCM51_FRONTCENTERLOW 2 |
44 | /** Audio flag: ADPCM'd 5.1 stream, rear left / right channels */ |
45 | #define XMV_AUDIO_ADPCM51_REARLEFTRIGHT 4 |
46 | |
47 | /** Audio flag: Any of the ADPCM'd 5.1 stream flags. */ |
48 | #define XMV_AUDIO_ADPCM51 (XMV_AUDIO_ADPCM51_FRONTLEFTRIGHT | \ |
49 | XMV_AUDIO_ADPCM51_FRONTCENTERLOW | \ |
50 | XMV_AUDIO_ADPCM51_REARLEFTRIGHT) |
51 | |
52 | #define XMV_BLOCK_ALIGN_SIZE 36 |
53 | |
54 | /** A video packet with an XMV file. */ |
55 | typedef struct XMVVideoPacket { |
56 | int created; |
57 | int stream_index; ///< The decoder stream index for this video packet. |
58 | |
59 | uint32_t data_size; ///< The size of the remaining video data. |
60 | uint64_t data_offset; ///< The offset of the video data within the file. |
61 | |
62 | uint32_t current_frame; ///< The current frame within this video packet. |
63 | uint32_t frame_count; ///< The amount of frames within this video packet. |
64 | |
65 | int has_extradata; ///< Does the video packet contain extra data? |
66 | uint8_t extradata[4]; ///< The extra data |
67 | |
68 | int64_t last_pts; ///< PTS of the last video frame. |
69 | int64_t pts; ///< PTS of the most current video frame. |
70 | } XMVVideoPacket; |
71 | |
72 | /** An audio packet with an XMV file. */ |
73 | typedef struct XMVAudioPacket { |
74 | int created; |
75 | int stream_index; ///< The decoder stream index for this audio packet. |
76 | |
77 | /* Stream format properties. */ |
78 | uint16_t compression; ///< The type of compression. |
79 | uint16_t channels; ///< Number of channels. |
80 | int32_t sample_rate; ///< Sampling rate. |
81 | uint16_t bits_per_sample; ///< Bits per compressed sample. |
82 | uint32_t bit_rate; ///< Bits of compressed data per second. |
83 | uint16_t flags; ///< Flags |
84 | unsigned block_align; ///< Bytes per compressed block. |
85 | uint16_t block_samples; ///< Decompressed samples per compressed block. |
86 | |
87 | enum AVCodecID codec_id; ///< The codec ID of the compression scheme. |
88 | |
89 | uint32_t data_size; ///< The size of the remaining audio data. |
90 | uint64_t data_offset; ///< The offset of the audio data within the file. |
91 | |
92 | uint32_t frame_size; ///< Number of bytes to put into an audio frame. |
93 | |
94 | uint64_t block_count; ///< Running counter of decompressed audio block. |
95 | } XMVAudioPacket; |
96 | |
97 | /** Context for demuxing an XMV file. */ |
98 | typedef struct XMVDemuxContext { |
99 | uint16_t audio_track_count; ///< Number of audio track in this file. |
100 | |
101 | uint32_t this_packet_size; ///< Size of the current packet. |
102 | uint32_t next_packet_size; ///< Size of the next packet. |
103 | |
104 | uint64_t this_packet_offset; ///< Offset of the current packet. |
105 | uint64_t next_packet_offset; ///< Offset of the next packet. |
106 | |
107 | uint16_t current_stream; ///< The index of the stream currently handling. |
108 | uint16_t stream_count; ///< The number of streams in this file. |
109 | |
110 | uint32_t video_duration; |
111 | uint32_t video_width; |
112 | uint32_t video_height; |
113 | |
114 | XMVVideoPacket video; ///< The video packet contained in each packet. |
115 | XMVAudioPacket *audio; ///< The audio packets contained in each packet. |
116 | } XMVDemuxContext; |
117 | |
118 | static int xmv_probe(AVProbeData *p) |
119 | { |
120 | uint32_t file_version; |
121 | |
122 | if (p->buf_size < XMV_MIN_HEADER_SIZE) |
123 | return 0; |
124 | |
125 | file_version = AV_RL32(p->buf + 16); |
126 | if ((file_version == 0) || (file_version > 4)) |
127 | return 0; |
128 | |
129 | if (!memcmp(p->buf + 12, "xobX", 4)) |
130 | return AVPROBE_SCORE_MAX; |
131 | |
132 | return 0; |
133 | } |
134 | |
135 | static int xmv_read_close(AVFormatContext *s) |
136 | { |
137 | XMVDemuxContext *xmv = s->priv_data; |
138 | |
139 | av_freep(&xmv->audio); |
140 | |
141 | return 0; |
142 | } |
143 | |
144 | static int xmv_read_header(AVFormatContext *s) |
145 | { |
146 | XMVDemuxContext *xmv = s->priv_data; |
147 | AVIOContext *pb = s->pb; |
148 | |
149 | uint32_t file_version; |
150 | uint32_t this_packet_size; |
151 | uint16_t audio_track; |
152 | int ret; |
153 | |
154 | s->ctx_flags |= AVFMTCTX_NOHEADER; |
155 | |
156 | avio_skip(pb, 4); /* Next packet size */ |
157 | |
158 | this_packet_size = avio_rl32(pb); |
159 | |
160 | avio_skip(pb, 4); /* Max packet size */ |
161 | avio_skip(pb, 4); /* "xobX" */ |
162 | |
163 | file_version = avio_rl32(pb); |
164 | if ((file_version != 4) && (file_version != 2)) |
165 | avpriv_request_sample(s, "Uncommon version %"PRIu32"", file_version); |
166 | |
167 | /* Video tracks */ |
168 | |
169 | xmv->video_width = avio_rl32(pb); |
170 | xmv->video_height = avio_rl32(pb); |
171 | xmv->video_duration = avio_rl32(pb); |
172 | |
173 | /* Audio tracks */ |
174 | |
175 | xmv->audio_track_count = avio_rl16(pb); |
176 | |
177 | avio_skip(pb, 2); /* Unknown (padding?) */ |
178 | |
179 | xmv->audio = av_mallocz_array(xmv->audio_track_count, sizeof(XMVAudioPacket)); |
180 | if (!xmv->audio) { |
181 | ret = AVERROR(ENOMEM); |
182 | goto fail; |
183 | } |
184 | |
185 | for (audio_track = 0; audio_track < xmv->audio_track_count; audio_track++) { |
186 | XMVAudioPacket *packet = &xmv->audio[audio_track]; |
187 | |
188 | packet->compression = avio_rl16(pb); |
189 | packet->channels = avio_rl16(pb); |
190 | packet->sample_rate = avio_rl32(pb); |
191 | packet->bits_per_sample = avio_rl16(pb); |
192 | packet->flags = avio_rl16(pb); |
193 | |
194 | packet->bit_rate = packet->bits_per_sample * |
195 | packet->sample_rate * |
196 | packet->channels; |
197 | packet->block_align = XMV_BLOCK_ALIGN_SIZE * packet->channels; |
198 | packet->block_samples = 64; |
199 | packet->codec_id = ff_wav_codec_get_id(packet->compression, |
200 | packet->bits_per_sample); |
201 | |
202 | packet->stream_index = -1; |
203 | |
204 | packet->frame_size = 0; |
205 | packet->block_count = 0; |
206 | |
207 | /* TODO: ADPCM'd 5.1 sound is encoded in three separate streams. |
208 | * Those need to be interleaved to a proper 5.1 stream. */ |
209 | if (packet->flags & XMV_AUDIO_ADPCM51) |
210 | av_log(s, AV_LOG_WARNING, "Unsupported 5.1 ADPCM audio stream " |
211 | "(0x%04X)\n", packet->flags); |
212 | |
213 | if (!packet->channels || packet->sample_rate <= 0 || |
214 | packet->channels >= UINT16_MAX / XMV_BLOCK_ALIGN_SIZE) { |
215 | av_log(s, AV_LOG_ERROR, "Invalid parameters for audio track %"PRIu16".\n", |
216 | audio_track); |
217 | ret = AVERROR_INVALIDDATA; |
218 | goto fail; |
219 | } |
220 | } |
221 | |
222 | |
223 | /* Initialize the packet context */ |
224 | |
225 | xmv->next_packet_offset = avio_tell(pb); |
226 | xmv->next_packet_size = this_packet_size - xmv->next_packet_offset; |
227 | xmv->stream_count = xmv->audio_track_count + 1; |
228 | |
229 | return 0; |
230 | |
231 | fail: |
232 | xmv_read_close(s); |
233 | return ret; |
234 | } |
235 | |
236 | static void xmv_read_extradata(uint8_t *extradata, AVIOContext *pb) |
237 | { |
238 | /* Read the XMV extradata */ |
239 | |
240 | uint32_t data = avio_rl32(pb); |
241 | |
242 | int mspel_bit = !!(data & 0x01); |
243 | int loop_filter = !!(data & 0x02); |
244 | int abt_flag = !!(data & 0x04); |
245 | int j_type_bit = !!(data & 0x08); |
246 | int top_left_mv_flag = !!(data & 0x10); |
247 | int per_mb_rl_bit = !!(data & 0x20); |
248 | int slice_count = (data >> 6) & 7; |
249 | |
250 | /* Write it back as standard WMV2 extradata */ |
251 | |
252 | data = 0; |
253 | |
254 | data |= mspel_bit << 15; |
255 | data |= loop_filter << 14; |
256 | data |= abt_flag << 13; |
257 | data |= j_type_bit << 12; |
258 | data |= top_left_mv_flag << 11; |
259 | data |= per_mb_rl_bit << 10; |
260 | data |= slice_count << 7; |
261 | |
262 | AV_WB32(extradata, data); |
263 | } |
264 | |
265 | static int xmv_process_packet_header(AVFormatContext *s) |
266 | { |
267 | XMVDemuxContext *xmv = s->priv_data; |
268 | AVIOContext *pb = s->pb; |
269 | int ret; |
270 | |
271 | uint8_t data[8]; |
272 | uint16_t audio_track; |
273 | uint64_t data_offset; |
274 | |
275 | /* Next packet size */ |
276 | xmv->next_packet_size = avio_rl32(pb); |
277 | |
278 | /* Packet video header */ |
279 | |
280 | if (avio_read(pb, data, 8) != 8) |
281 | return AVERROR(EIO); |
282 | |
283 | xmv->video.data_size = AV_RL32(data) & 0x007FFFFF; |
284 | |
285 | xmv->video.current_frame = 0; |
286 | xmv->video.frame_count = (AV_RL32(data) >> 23) & 0xFF; |
287 | |
288 | xmv->video.has_extradata = (data[3] & 0x80) != 0; |
289 | |
290 | if (!xmv->video.created) { |
291 | AVStream *vst = avformat_new_stream(s, NULL); |
292 | if (!vst) |
293 | return AVERROR(ENOMEM); |
294 | |
295 | avpriv_set_pts_info(vst, 32, 1, 1000); |
296 | |
297 | vst->codecpar->codec_type = AVMEDIA_TYPE_VIDEO; |
298 | vst->codecpar->codec_id = AV_CODEC_ID_WMV2; |
299 | vst->codecpar->codec_tag = MKBETAG('W', 'M', 'V', '2'); |
300 | vst->codecpar->width = xmv->video_width; |
301 | vst->codecpar->height = xmv->video_height; |
302 | |
303 | vst->duration = xmv->video_duration; |
304 | |
305 | xmv->video.stream_index = vst->index; |
306 | |
307 | xmv->video.created = 1; |
308 | } |
309 | |
310 | /* Adding the audio data sizes and the video data size keeps you 4 bytes |
311 | * short for every audio track. But as playing around with XMV files with |
312 | * ADPCM audio showed, taking the extra 4 bytes from the audio data gives |
313 | * you either completely distorted audio or click (when skipping the |
314 | * remaining 68 bytes of the ADPCM block). Subtracting 4 bytes for every |
315 | * audio track from the video data works at least for the audio. Probably |
316 | * some alignment thing? |
317 | * The video data has (always?) lots of padding, so it should work out... |
318 | */ |
319 | xmv->video.data_size -= xmv->audio_track_count * 4; |
320 | |
321 | xmv->current_stream = 0; |
322 | if (!xmv->video.frame_count) { |
323 | xmv->video.frame_count = 1; |
324 | xmv->current_stream = xmv->stream_count > 1; |
325 | } |
326 | |
327 | /* Packet audio header */ |
328 | |
329 | for (audio_track = 0; audio_track < xmv->audio_track_count; audio_track++) { |
330 | XMVAudioPacket *packet = &xmv->audio[audio_track]; |
331 | |
332 | if (avio_read(pb, data, 4) != 4) |
333 | return AVERROR(EIO); |
334 | |
335 | if (!packet->created) { |
336 | AVStream *ast = avformat_new_stream(s, NULL); |
337 | if (!ast) |
338 | return AVERROR(ENOMEM); |
339 | |
340 | ast->codecpar->codec_type = AVMEDIA_TYPE_AUDIO; |
341 | ast->codecpar->codec_id = packet->codec_id; |
342 | ast->codecpar->codec_tag = packet->compression; |
343 | ast->codecpar->channels = packet->channels; |
344 | ast->codecpar->sample_rate = packet->sample_rate; |
345 | ast->codecpar->bits_per_coded_sample = packet->bits_per_sample; |
346 | ast->codecpar->bit_rate = packet->bit_rate; |
347 | ast->codecpar->block_align = 36 * packet->channels; |
348 | |
349 | avpriv_set_pts_info(ast, 32, packet->block_samples, packet->sample_rate); |
350 | |
351 | packet->stream_index = ast->index; |
352 | |
353 | ast->duration = xmv->video_duration; |
354 | |
355 | packet->created = 1; |
356 | } |
357 | |
358 | packet->data_size = AV_RL32(data) & 0x007FFFFF; |
359 | if ((packet->data_size == 0) && (audio_track != 0)) |
360 | /* This happens when I create an XMV with several identical audio |
361 | * streams. From the size calculations, duplicating the previous |
362 | * stream's size works out, but the track data itself is silent. |
363 | * Maybe this should also redirect the offset to the previous track? |
364 | */ |
365 | packet->data_size = xmv->audio[audio_track - 1].data_size; |
366 | |
367 | /* Carve up the audio data in frame_count slices */ |
368 | packet->frame_size = packet->data_size / xmv->video.frame_count; |
369 | packet->frame_size -= packet->frame_size % packet->block_align; |
370 | } |
371 | |
372 | /* Packet data offsets */ |
373 | |
374 | data_offset = avio_tell(pb); |
375 | |
376 | xmv->video.data_offset = data_offset; |
377 | data_offset += xmv->video.data_size; |
378 | |
379 | for (audio_track = 0; audio_track < xmv->audio_track_count; audio_track++) { |
380 | xmv->audio[audio_track].data_offset = data_offset; |
381 | data_offset += xmv->audio[audio_track].data_size; |
382 | } |
383 | |
384 | /* Video frames header */ |
385 | |
386 | /* Read new video extra data */ |
387 | if (xmv->video.data_size > 0) { |
388 | if (xmv->video.has_extradata) { |
389 | xmv_read_extradata(xmv->video.extradata, pb); |
390 | |
391 | xmv->video.data_size -= 4; |
392 | xmv->video.data_offset += 4; |
393 | |
394 | if (xmv->video.stream_index >= 0) { |
395 | AVStream *vst = s->streams[xmv->video.stream_index]; |
396 | |
397 | av_assert0(xmv->video.stream_index < s->nb_streams); |
398 | |
399 | if (vst->codecpar->extradata_size < 4) { |
400 | av_freep(&vst->codecpar->extradata); |
401 | |
402 | if ((ret = ff_alloc_extradata(vst->codecpar, 4)) < 0) |
403 | return ret; |
404 | } |
405 | |
406 | memcpy(vst->codecpar->extradata, xmv->video.extradata, 4); |
407 | } |
408 | } |
409 | } |
410 | |
411 | return 0; |
412 | } |
413 | |
414 | static int xmv_fetch_new_packet(AVFormatContext *s) |
415 | { |
416 | XMVDemuxContext *xmv = s->priv_data; |
417 | AVIOContext *pb = s->pb; |
418 | int result; |
419 | |
420 | if (xmv->this_packet_offset == xmv->next_packet_offset) |
421 | return AVERROR_EOF; |
422 | |
423 | /* Seek to it */ |
424 | xmv->this_packet_offset = xmv->next_packet_offset; |
425 | if (avio_seek(pb, xmv->this_packet_offset, SEEK_SET) != xmv->this_packet_offset) |
426 | return AVERROR(EIO); |
427 | |
428 | /* Update the size */ |
429 | xmv->this_packet_size = xmv->next_packet_size; |
430 | if (xmv->this_packet_size < (12 + xmv->audio_track_count * 4)) |
431 | return AVERROR(EIO); |
432 | |
433 | /* Process the header */ |
434 | result = xmv_process_packet_header(s); |
435 | if (result) |
436 | return result; |
437 | |
438 | /* Update the offset */ |
439 | xmv->next_packet_offset = xmv->this_packet_offset + xmv->this_packet_size; |
440 | |
441 | return 0; |
442 | } |
443 | |
444 | static int xmv_fetch_audio_packet(AVFormatContext *s, |
445 | AVPacket *pkt, uint32_t stream) |
446 | { |
447 | XMVDemuxContext *xmv = s->priv_data; |
448 | AVIOContext *pb = s->pb; |
449 | XMVAudioPacket *audio = &xmv->audio[stream]; |
450 | |
451 | uint32_t data_size; |
452 | uint32_t block_count; |
453 | int result; |
454 | |
455 | /* Seek to it */ |
456 | if (avio_seek(pb, audio->data_offset, SEEK_SET) != audio->data_offset) |
457 | return AVERROR(EIO); |
458 | |
459 | if ((xmv->video.current_frame + 1) < xmv->video.frame_count) |
460 | /* Not the last frame, get at most frame_size bytes. */ |
461 | data_size = FFMIN(audio->frame_size, audio->data_size); |
462 | else |
463 | /* Last frame, get the rest. */ |
464 | data_size = audio->data_size; |
465 | |
466 | /* Read the packet */ |
467 | result = av_get_packet(pb, pkt, data_size); |
468 | if (result <= 0) |
469 | return result; |
470 | |
471 | pkt->stream_index = audio->stream_index; |
472 | |
473 | /* Calculate the PTS */ |
474 | |
475 | block_count = data_size / audio->block_align; |
476 | |
477 | pkt->duration = block_count; |
478 | pkt->pts = audio->block_count; |
479 | pkt->dts = AV_NOPTS_VALUE; |
480 | |
481 | audio->block_count += block_count; |
482 | |
483 | /* Advance offset */ |
484 | audio->data_size -= data_size; |
485 | audio->data_offset += data_size; |
486 | |
487 | return 0; |
488 | } |
489 | |
490 | static int xmv_fetch_video_packet(AVFormatContext *s, |
491 | AVPacket *pkt) |
492 | { |
493 | XMVDemuxContext *xmv = s->priv_data; |
494 | AVIOContext *pb = s->pb; |
495 | XMVVideoPacket *video = &xmv->video; |
496 | |
497 | int result; |
498 | uint32_t frame_header; |
499 | uint32_t frame_size, frame_timestamp; |
500 | uint8_t *data, *end; |
501 | |
502 | /* Seek to it */ |
503 | if (avio_seek(pb, video->data_offset, SEEK_SET) != video->data_offset) |
504 | return AVERROR(EIO); |
505 | |
506 | /* Read the frame header */ |
507 | frame_header = avio_rl32(pb); |
508 | |
509 | frame_size = (frame_header & 0x1FFFF) * 4 + 4; |
510 | frame_timestamp = (frame_header >> 17); |
511 | |
512 | if ((frame_size + 4) > video->data_size) |
513 | return AVERROR(EIO); |
514 | |
515 | /* Get the packet data */ |
516 | result = av_get_packet(pb, pkt, frame_size); |
517 | if (result != frame_size) |
518 | return result; |
519 | |
520 | /* Contrary to normal WMV2 video, the bit stream in XMV's |
521 | * WMV2 is little-endian. |
522 | * TODO: This manual swap is of course suboptimal. |
523 | */ |
524 | for (data = pkt->data, end = pkt->data + frame_size; data < end; data += 4) |
525 | AV_WB32(data, AV_RL32(data)); |
526 | |
527 | pkt->stream_index = video->stream_index; |
528 | |
529 | /* Calculate the PTS */ |
530 | |
531 | video->last_pts = frame_timestamp + video->pts; |
532 | |
533 | pkt->duration = 0; |
534 | pkt->pts = video->last_pts; |
535 | pkt->dts = AV_NOPTS_VALUE; |
536 | |
537 | video->pts += frame_timestamp; |
538 | |
539 | /* Keyframe? */ |
540 | pkt->flags = (pkt->data[0] & 0x80) ? 0 : AV_PKT_FLAG_KEY; |
541 | |
542 | /* Advance offset */ |
543 | video->data_size -= frame_size + 4; |
544 | video->data_offset += frame_size + 4; |
545 | |
546 | return 0; |
547 | } |
548 | |
549 | static int xmv_read_packet(AVFormatContext *s, |
550 | AVPacket *pkt) |
551 | { |
552 | XMVDemuxContext *xmv = s->priv_data; |
553 | int result; |
554 | |
555 | if (xmv->video.current_frame == xmv->video.frame_count) { |
556 | /* No frames left in this packet, so we fetch a new one */ |
557 | |
558 | result = xmv_fetch_new_packet(s); |
559 | if (result) |
560 | return result; |
561 | } |
562 | |
563 | if (xmv->current_stream == 0) { |
564 | /* Fetch a video frame */ |
565 | |
566 | result = xmv_fetch_video_packet(s, pkt); |
567 | } else { |
568 | /* Fetch an audio frame */ |
569 | |
570 | result = xmv_fetch_audio_packet(s, pkt, xmv->current_stream - 1); |
571 | } |
572 | if (result) { |
573 | xmv->current_stream = 0; |
574 | xmv->video.current_frame = xmv->video.frame_count; |
575 | return result; |
576 | } |
577 | |
578 | |
579 | /* Increase our counters */ |
580 | if (++xmv->current_stream >= xmv->stream_count) { |
581 | xmv->current_stream = 0; |
582 | xmv->video.current_frame += 1; |
583 | } |
584 | |
585 | return 0; |
586 | } |
587 | |
588 | AVInputFormat ff_xmv_demuxer = { |
589 | .name = "xmv", |
590 | .long_name = NULL_IF_CONFIG_SMALL("Microsoft XMV"), |
591 | .extensions = "xmv", |
592 | .priv_data_size = sizeof(XMVDemuxContext), |
593 | .read_probe = xmv_probe, |
594 | .read_header = xmv_read_header, |
595 | .read_packet = xmv_read_packet, |
596 | .read_close = xmv_read_close, |
597 | }; |
598 |