blob: 93c103b31655fc47fee238aa9cc6a68904f013aa
1 | /* |
2 | * General DV muxer/demuxer |
3 | * Copyright (c) 2003 Roman Shaposhnik |
4 | * |
5 | * Many thanks to Dan Dennedy <dan@dennedy.org> for providing wealth |
6 | * of DV technical info. |
7 | * |
8 | * Raw DV format |
9 | * Copyright (c) 2002 Fabrice Bellard |
10 | * |
11 | * 50 Mbps (DVCPRO50) support |
12 | * Copyright (c) 2006 Daniel Maas <dmaas@maasdigital.com> |
13 | * |
14 | * This file is part of FFmpeg. |
15 | * |
16 | * FFmpeg is free software; you can redistribute it and/or |
17 | * modify it under the terms of the GNU Lesser General Public |
18 | * License as published by the Free Software Foundation; either |
19 | * version 2.1 of the License, or (at your option) any later version. |
20 | * |
21 | * FFmpeg is distributed in the hope that it will be useful, |
22 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
23 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
24 | * Lesser General Public License for more details. |
25 | * |
26 | * You should have received a copy of the GNU Lesser General Public |
27 | * License along with FFmpeg; if not, write to the Free Software |
28 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
29 | */ |
30 | #include <time.h> |
31 | #include <stdarg.h> |
32 | |
33 | #include "avformat.h" |
34 | #include "internal.h" |
35 | #include "libavcodec/dv_profile.h" |
36 | #include "libavcodec/dv.h" |
37 | #include "dv.h" |
38 | #include "libavutil/avassert.h" |
39 | #include "libavutil/fifo.h" |
40 | #include "libavutil/mathematics.h" |
41 | #include "libavutil/intreadwrite.h" |
42 | #include "libavutil/opt.h" |
43 | #include "libavutil/timecode.h" |
44 | |
45 | #define MAX_AUDIO_FRAME_SIZE 192000 // 1 second of 48khz 32-bit audio |
46 | |
47 | struct DVMuxContext { |
48 | AVClass *av_class; |
49 | const AVDVProfile* sys; /* current DV profile, e.g.: 525/60, 625/50 */ |
50 | int n_ast; /* number of stereo audio streams (up to 2) */ |
51 | AVStream *ast[2]; /* stereo audio streams */ |
52 | AVFifoBuffer *audio_data[2]; /* FIFO for storing excessive amounts of PCM */ |
53 | int frames; /* current frame number */ |
54 | int64_t start_time; /* recording start time */ |
55 | int has_audio; /* frame under construction has audio */ |
56 | int has_video; /* frame under construction has video */ |
57 | uint8_t frame_buf[DV_MAX_FRAME_SIZE]; /* frame under construction */ |
58 | AVTimecode tc; /* timecode context */ |
59 | }; |
60 | |
61 | static const int dv_aaux_packs_dist[12][9] = { |
62 | { 0xff, 0xff, 0xff, 0x50, 0x51, 0x52, 0x53, 0xff, 0xff }, |
63 | { 0x50, 0x51, 0x52, 0x53, 0xff, 0xff, 0xff, 0xff, 0xff }, |
64 | { 0xff, 0xff, 0xff, 0x50, 0x51, 0x52, 0x53, 0xff, 0xff }, |
65 | { 0x50, 0x51, 0x52, 0x53, 0xff, 0xff, 0xff, 0xff, 0xff }, |
66 | { 0xff, 0xff, 0xff, 0x50, 0x51, 0x52, 0x53, 0xff, 0xff }, |
67 | { 0x50, 0x51, 0x52, 0x53, 0xff, 0xff, 0xff, 0xff, 0xff }, |
68 | { 0xff, 0xff, 0xff, 0x50, 0x51, 0x52, 0x53, 0xff, 0xff }, |
69 | { 0x50, 0x51, 0x52, 0x53, 0xff, 0xff, 0xff, 0xff, 0xff }, |
70 | { 0xff, 0xff, 0xff, 0x50, 0x51, 0x52, 0x53, 0xff, 0xff }, |
71 | { 0x50, 0x51, 0x52, 0x53, 0xff, 0xff, 0xff, 0xff, 0xff }, |
72 | { 0xff, 0xff, 0xff, 0x50, 0x51, 0x52, 0x53, 0xff, 0xff }, |
73 | { 0x50, 0x51, 0x52, 0x53, 0xff, 0xff, 0xff, 0xff, 0xff }, |
74 | }; |
75 | |
76 | static int dv_audio_frame_size(const AVDVProfile* sys, int frame, int sample_rate) |
77 | { |
78 | if ((sys->time_base.den == 25 || sys->time_base.den == 50) && sys->time_base.num == 1) { |
79 | if (sample_rate == 32000) return 1280; |
80 | else if (sample_rate == 44100) return 1764; |
81 | else return 1920; |
82 | } |
83 | |
84 | av_assert0(sample_rate == 48000); |
85 | |
86 | return sys->audio_samples_dist[frame % (sizeof(sys->audio_samples_dist) / |
87 | sizeof(sys->audio_samples_dist[0]))]; |
88 | } |
89 | |
90 | static int dv_write_pack(enum dv_pack_type pack_id, DVMuxContext *c, uint8_t* buf, ...) |
91 | { |
92 | struct tm tc; |
93 | time_t ct; |
94 | uint32_t timecode; |
95 | va_list ap; |
96 | int audio_type = 0; |
97 | int channel; |
98 | |
99 | buf[0] = (uint8_t)pack_id; |
100 | switch (pack_id) { |
101 | case dv_timecode: |
102 | timecode = av_timecode_get_smpte_from_framenum(&c->tc, c->frames); |
103 | timecode |= 1<<23 | 1<<15 | 1<<7 | 1<<6; // biphase and binary group flags |
104 | AV_WB32(buf + 1, timecode); |
105 | break; |
106 | case dv_audio_source: /* AAUX source pack */ |
107 | va_start(ap, buf); |
108 | channel = va_arg(ap, int); |
109 | if (c->ast[channel]->codecpar->sample_rate == 44100) { |
110 | audio_type = 1; |
111 | } else if (c->ast[channel]->codecpar->sample_rate == 32000) |
112 | audio_type = 2; |
113 | buf[1] = (1 << 7) | /* locked mode -- SMPTE only supports locked mode */ |
114 | (1 << 6) | /* reserved -- always 1 */ |
115 | (dv_audio_frame_size(c->sys, c->frames, c->ast[channel]->codecpar->sample_rate) - |
116 | c->sys->audio_min_samples[audio_type]); |
117 | /* # of samples */ |
118 | buf[2] = (0 << 7) | /* multi-stereo */ |
119 | (0 << 5) | /* #of audio channels per block: 0 -- 1 channel */ |
120 | (0 << 4) | /* pair bit: 0 -- one pair of channels */ |
121 | !!va_arg(ap, int); /* audio mode */ |
122 | buf[3] = (1 << 7) | /* res */ |
123 | (1 << 6) | /* multi-language flag */ |
124 | (c->sys->dsf << 5) | /* system: 60fields/50fields */ |
125 | (c->sys->n_difchan & 2); /* definition: 0 -- 25Mbps, 2 -- 50Mbps */ |
126 | buf[4] = (1 << 7) | /* emphasis: 1 -- off */ |
127 | (0 << 6) | /* emphasis time constant: 0 -- reserved */ |
128 | (audio_type << 3) | /* frequency: 0 -- 48kHz, 1 -- 44,1kHz, 2 -- 32kHz */ |
129 | 0; /* quantization: 0 -- 16-bit linear, 1 -- 12-bit nonlinear */ |
130 | |
131 | va_end(ap); |
132 | break; |
133 | case dv_audio_control: |
134 | buf[1] = (0 << 6) | /* copy protection: 0 -- unrestricted */ |
135 | (1 << 4) | /* input source: 1 -- digital input */ |
136 | (3 << 2) | /* compression: 3 -- no information */ |
137 | 0; /* misc. info/SMPTE emphasis off */ |
138 | buf[2] = (1 << 7) | /* recording start point: 1 -- no */ |
139 | (1 << 6) | /* recording end point: 1 -- no */ |
140 | (1 << 3) | /* recording mode: 1 -- original */ |
141 | 7; |
142 | buf[3] = (1 << 7) | /* direction: 1 -- forward */ |
143 | (c->sys->pix_fmt == AV_PIX_FMT_YUV420P ? 0x20 : /* speed */ |
144 | c->sys->ltc_divisor * 4); |
145 | buf[4] = (1 << 7) | /* reserved -- always 1 */ |
146 | 0x7f; /* genre category */ |
147 | break; |
148 | case dv_audio_recdate: |
149 | case dv_video_recdate: /* VAUX recording date */ |
150 | ct = c->start_time + av_rescale_rnd(c->frames, c->sys->time_base.num, |
151 | c->sys->time_base.den, AV_ROUND_DOWN); |
152 | ff_brktimegm(ct, &tc); |
153 | buf[1] = 0xff; /* ds, tm, tens of time zone, units of time zone */ |
154 | /* 0xff is very likely to be "unknown" */ |
155 | buf[2] = (3 << 6) | /* reserved -- always 1 */ |
156 | ((tc.tm_mday / 10) << 4) | /* Tens of day */ |
157 | (tc.tm_mday % 10); /* Units of day */ |
158 | buf[3] = /* we set high 4 bits to 0, shouldn't we set them to week? */ |
159 | ((tc.tm_mon / 10) << 4) | /* Tens of month */ |
160 | (tc.tm_mon % 10); /* Units of month */ |
161 | buf[4] = (((tc.tm_year % 100) / 10) << 4) | /* Tens of year */ |
162 | (tc.tm_year % 10); /* Units of year */ |
163 | break; |
164 | case dv_audio_rectime: /* AAUX recording time */ |
165 | case dv_video_rectime: /* VAUX recording time */ |
166 | ct = c->start_time + av_rescale_rnd(c->frames, c->sys->time_base.num, |
167 | c->sys->time_base.den, AV_ROUND_DOWN); |
168 | ff_brktimegm(ct, &tc); |
169 | buf[1] = (3 << 6) | /* reserved -- always 1 */ |
170 | 0x3f; /* tens of frame, units of frame: 0x3f - "unknown" ? */ |
171 | buf[2] = (1 << 7) | /* reserved -- always 1 */ |
172 | ((tc.tm_sec / 10) << 4) | /* Tens of seconds */ |
173 | (tc.tm_sec % 10); /* Units of seconds */ |
174 | buf[3] = (1 << 7) | /* reserved -- always 1 */ |
175 | ((tc.tm_min / 10) << 4) | /* Tens of minutes */ |
176 | (tc.tm_min % 10); /* Units of minutes */ |
177 | buf[4] = (3 << 6) | /* reserved -- always 1 */ |
178 | ((tc.tm_hour / 10) << 4) | /* Tens of hours */ |
179 | (tc.tm_hour % 10); /* Units of hours */ |
180 | break; |
181 | default: |
182 | buf[1] = buf[2] = buf[3] = buf[4] = 0xff; |
183 | } |
184 | return 5; |
185 | } |
186 | |
187 | static void dv_inject_audio(DVMuxContext *c, int channel, uint8_t* frame_ptr) |
188 | { |
189 | int i, j, d, of, size; |
190 | size = 4 * dv_audio_frame_size(c->sys, c->frames, c->ast[channel]->codecpar->sample_rate); |
191 | frame_ptr += channel * c->sys->difseg_size * 150 * 80; |
192 | for (i = 0; i < c->sys->difseg_size; i++) { |
193 | frame_ptr += 6 * 80; /* skip DIF segment header */ |
194 | for (j = 0; j < 9; j++) { |
195 | dv_write_pack(dv_aaux_packs_dist[i][j], c, &frame_ptr[3], channel, i >= c->sys->difseg_size/2); |
196 | for (d = 8; d < 80; d+=2) { |
197 | of = c->sys->audio_shuffle[i][j] + (d - 8)/2 * c->sys->audio_stride; |
198 | if (of*2 >= size) |
199 | continue; |
200 | |
201 | frame_ptr[d] = *av_fifo_peek2(c->audio_data[channel], of*2+1); // FIXME: maybe we have to admit |
202 | frame_ptr[d+1] = *av_fifo_peek2(c->audio_data[channel], of*2); // that DV is a big-endian PCM |
203 | } |
204 | frame_ptr += 16 * 80; /* 15 Video DIFs + 1 Audio DIF */ |
205 | } |
206 | } |
207 | } |
208 | |
209 | static void dv_inject_metadata(DVMuxContext *c, uint8_t* frame) |
210 | { |
211 | int j, k; |
212 | uint8_t* buf; |
213 | |
214 | for (buf = frame; buf < frame + c->sys->frame_size; buf += 150 * 80) { |
215 | /* DV subcode: 2nd and 3d DIFs */ |
216 | for (j = 80; j < 80 * 3; j += 80) { |
217 | for (k = 6; k < 6 * 8; k += 8) |
218 | dv_write_pack(dv_timecode, c, &buf[j+k]); |
219 | |
220 | if (((long)(buf-frame)/(c->sys->frame_size/(c->sys->difseg_size*c->sys->n_difchan))%c->sys->difseg_size) > 5) { /* FIXME: is this really needed ? */ |
221 | dv_write_pack(dv_video_recdate, c, &buf[j+14]); |
222 | dv_write_pack(dv_video_rectime, c, &buf[j+22]); |
223 | dv_write_pack(dv_video_recdate, c, &buf[j+38]); |
224 | dv_write_pack(dv_video_rectime, c, &buf[j+46]); |
225 | } |
226 | } |
227 | |
228 | /* DV VAUX: 4th, 5th and 6th 3DIFs */ |
229 | for (j = 80*3 + 3; j < 80*6; j += 80) { |
230 | dv_write_pack(dv_video_recdate, c, &buf[j+5*2]); |
231 | dv_write_pack(dv_video_rectime, c, &buf[j+5*3]); |
232 | dv_write_pack(dv_video_recdate, c, &buf[j+5*11]); |
233 | dv_write_pack(dv_video_rectime, c, &buf[j+5*12]); |
234 | } |
235 | } |
236 | } |
237 | |
238 | /* |
239 | * The following 3 functions constitute our interface to the world |
240 | */ |
241 | |
242 | static int dv_assemble_frame(AVFormatContext *s, |
243 | DVMuxContext *c, AVStream* st, |
244 | uint8_t* data, int data_size, uint8_t** frame) |
245 | { |
246 | int i, reqasize; |
247 | |
248 | *frame = &c->frame_buf[0]; |
249 | |
250 | switch (st->codecpar->codec_type) { |
251 | case AVMEDIA_TYPE_VIDEO: |
252 | /* FIXME: we have to have more sensible approach than this one */ |
253 | if (c->has_video) |
254 | av_log(s, AV_LOG_ERROR, "Can't process DV frame #%d. Insufficient audio data or severe sync problem.\n", c->frames); |
255 | if (data_size != c->sys->frame_size) { |
256 | av_log(s, AV_LOG_ERROR, "Unexpected frame size, %d != %d\n", |
257 | data_size, c->sys->frame_size); |
258 | return AVERROR(ENOSYS); |
259 | } |
260 | |
261 | memcpy(*frame, data, c->sys->frame_size); |
262 | c->has_video = 1; |
263 | break; |
264 | case AVMEDIA_TYPE_AUDIO: |
265 | for (i = 0; i < c->n_ast && st != c->ast[i]; i++); |
266 | |
267 | /* FIXME: we have to have more sensible approach than this one */ |
268 | if (av_fifo_size(c->audio_data[i]) + data_size >= 100*MAX_AUDIO_FRAME_SIZE) |
269 | av_log(s, AV_LOG_ERROR, "Can't process DV frame #%d. Insufficient video data or severe sync problem.\n", c->frames); |
270 | av_fifo_generic_write(c->audio_data[i], data, data_size, NULL); |
271 | |
272 | reqasize = 4 * dv_audio_frame_size(c->sys, c->frames, st->codecpar->sample_rate); |
273 | |
274 | /* Let us see if we've got enough audio for one DV frame. */ |
275 | c->has_audio |= ((reqasize <= av_fifo_size(c->audio_data[i])) << i); |
276 | |
277 | break; |
278 | default: |
279 | break; |
280 | } |
281 | |
282 | /* Let us see if we have enough data to construct one DV frame. */ |
283 | if (c->has_video == 1 && c->has_audio + 1 == 1 << c->n_ast) { |
284 | dv_inject_metadata(c, *frame); |
285 | c->has_audio = 0; |
286 | for (i=0; i < c->n_ast; i++) { |
287 | dv_inject_audio(c, i, *frame); |
288 | reqasize = 4 * dv_audio_frame_size(c->sys, c->frames, c->ast[i]->codecpar->sample_rate); |
289 | av_fifo_drain(c->audio_data[i], reqasize); |
290 | c->has_audio |= ((reqasize <= av_fifo_size(c->audio_data[i])) << i); |
291 | } |
292 | |
293 | c->has_video = 0; |
294 | |
295 | c->frames++; |
296 | |
297 | return c->sys->frame_size; |
298 | } |
299 | |
300 | return 0; |
301 | } |
302 | |
303 | static DVMuxContext* dv_init_mux(AVFormatContext* s) |
304 | { |
305 | DVMuxContext *c = s->priv_data; |
306 | AVStream *vst = NULL; |
307 | int i; |
308 | |
309 | /* we support at most 1 video and 2 audio streams */ |
310 | if (s->nb_streams > 3) |
311 | return NULL; |
312 | |
313 | c->n_ast = 0; |
314 | c->ast[0] = c->ast[1] = NULL; |
315 | |
316 | /* We have to sort out where audio and where video stream is */ |
317 | for (i=0; i<s->nb_streams; i++) { |
318 | switch (s->streams[i]->codecpar->codec_type) { |
319 | case AVMEDIA_TYPE_VIDEO: |
320 | if (vst) return NULL; |
321 | vst = s->streams[i]; |
322 | break; |
323 | case AVMEDIA_TYPE_AUDIO: |
324 | if (c->n_ast > 1) return NULL; |
325 | c->ast[c->n_ast++] = s->streams[i]; |
326 | break; |
327 | default: |
328 | goto bail_out; |
329 | } |
330 | } |
331 | |
332 | /* Some checks -- DV format is very picky about its incoming streams */ |
333 | if (!vst || vst->codecpar->codec_id != AV_CODEC_ID_DVVIDEO) |
334 | goto bail_out; |
335 | for (i=0; i<c->n_ast; i++) { |
336 | if (c->ast[i]) { |
337 | if(c->ast[i]->codecpar->codec_id != AV_CODEC_ID_PCM_S16LE || |
338 | c->ast[i]->codecpar->channels != 2) |
339 | goto bail_out; |
340 | if (c->ast[i]->codecpar->sample_rate != 48000 && |
341 | c->ast[i]->codecpar->sample_rate != 44100 && |
342 | c->ast[i]->codecpar->sample_rate != 32000 ) |
343 | goto bail_out; |
344 | } |
345 | } |
346 | c->sys = av_dv_codec_profile2(vst->codecpar->width, vst->codecpar->height, |
347 | vst->codecpar->format, vst->time_base); |
348 | if (!c->sys) |
349 | goto bail_out; |
350 | |
351 | if ((c->sys->time_base.den != 25 && c->sys->time_base.den != 50) || c->sys->time_base.num != 1) { |
352 | if (c->ast[0] && c->ast[0]->codecpar->sample_rate != 48000) |
353 | goto bail_out; |
354 | if (c->ast[1] && c->ast[1]->codecpar->sample_rate != 48000) |
355 | goto bail_out; |
356 | } |
357 | |
358 | if ((c->n_ast > 1) && (c->sys->n_difchan < 2)) { |
359 | /* only 1 stereo pair is allowed in 25Mbps mode */ |
360 | goto bail_out; |
361 | } |
362 | |
363 | /* Ok, everything seems to be in working order */ |
364 | c->frames = 0; |
365 | c->has_audio = 0; |
366 | c->has_video = 0; |
367 | ff_parse_creation_time_metadata(s, &c->start_time, 1); |
368 | |
369 | for (i=0; i < c->n_ast; i++) { |
370 | if (c->ast[i] && !(c->audio_data[i]=av_fifo_alloc_array(100, MAX_AUDIO_FRAME_SIZE))) { |
371 | while (i > 0) { |
372 | i--; |
373 | av_fifo_freep(&c->audio_data[i]); |
374 | } |
375 | goto bail_out; |
376 | } |
377 | } |
378 | |
379 | return c; |
380 | |
381 | bail_out: |
382 | return NULL; |
383 | } |
384 | |
385 | static void dv_delete_mux(DVMuxContext *c) |
386 | { |
387 | int i; |
388 | for (i=0; i < c->n_ast; i++) |
389 | av_fifo_freep(&c->audio_data[i]); |
390 | } |
391 | |
392 | static int dv_write_header(AVFormatContext *s) |
393 | { |
394 | AVRational rate; |
395 | DVMuxContext *dvc = s->priv_data; |
396 | AVDictionaryEntry *tcr = av_dict_get(s->metadata, "timecode", NULL, 0); |
397 | |
398 | if (!dv_init_mux(s)) { |
399 | av_log(s, AV_LOG_ERROR, "Can't initialize DV format!\n" |
400 | "Make sure that you supply exactly two streams:\n" |
401 | " video: 25fps or 29.97fps, audio: 2ch/48|44|32kHz/PCM\n" |
402 | " (50Mbps allows an optional second audio stream)\n"); |
403 | return -1; |
404 | } |
405 | rate.num = dvc->sys->ltc_divisor; |
406 | rate.den = 1; |
407 | if (!tcr) { // no global timecode, look into the streams |
408 | int i; |
409 | for (i = 0; i < s->nb_streams; i++) { |
410 | tcr = av_dict_get(s->streams[i]->metadata, "timecode", NULL, 0); |
411 | if (tcr) |
412 | break; |
413 | } |
414 | } |
415 | if (tcr && av_timecode_init_from_string(&dvc->tc, rate, tcr->value, s) >= 0) |
416 | return 0; |
417 | return av_timecode_init(&dvc->tc, rate, 0, 0, s); |
418 | } |
419 | |
420 | static int dv_write_packet(struct AVFormatContext *s, AVPacket *pkt) |
421 | { |
422 | uint8_t* frame; |
423 | int fsize; |
424 | |
425 | fsize = dv_assemble_frame(s, s->priv_data, s->streams[pkt->stream_index], |
426 | pkt->data, pkt->size, &frame); |
427 | if (fsize > 0) { |
428 | avio_write(s->pb, frame, fsize); |
429 | } |
430 | return 0; |
431 | } |
432 | |
433 | /* |
434 | * We might end up with some extra A/V data without matching counterpart. |
435 | * E.g. video data without enough audio to write the complete frame. |
436 | * Currently we simply drop the last frame. I don't know whether this |
437 | * is the best strategy of all |
438 | */ |
439 | static int dv_write_trailer(struct AVFormatContext *s) |
440 | { |
441 | dv_delete_mux(s->priv_data); |
442 | return 0; |
443 | } |
444 | |
445 | AVOutputFormat ff_dv_muxer = { |
446 | .name = "dv", |
447 | .long_name = NULL_IF_CONFIG_SMALL("DV (Digital Video)"), |
448 | .extensions = "dv", |
449 | .priv_data_size = sizeof(DVMuxContext), |
450 | .audio_codec = AV_CODEC_ID_PCM_S16LE, |
451 | .video_codec = AV_CODEC_ID_DVVIDEO, |
452 | .write_header = dv_write_header, |
453 | .write_packet = dv_write_packet, |
454 | .write_trailer = dv_write_trailer, |
455 | }; |
456 |