blob: 19939d3a859fa66f62aed9f7cac1cd6cbc36b82d
1 | /* |
2 | * Core Audio Format demuxer |
3 | * Copyright (c) 2007 Justin Ruggles |
4 | * Copyright (c) 2009 Peter Ross |
5 | * |
6 | * This file is part of FFmpeg. |
7 | * |
8 | * FFmpeg is free software; you can redistribute it and/or |
9 | * modify it under the terms of the GNU Lesser General Public |
10 | * License as published by the Free Software Foundation; either |
11 | * version 2.1 of the License, or (at your option) any later version. |
12 | * |
13 | * FFmpeg is distributed in the hope that it will be useful, |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 | * Lesser General Public License for more details. |
17 | * |
18 | * You should have received a copy of the GNU Lesser General Public |
19 | * License along with FFmpeg; if not, write to the Free Software |
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
21 | */ |
22 | |
23 | /** |
24 | * @file |
25 | * Core Audio Format demuxer |
26 | */ |
27 | |
28 | #include <inttypes.h> |
29 | |
30 | #include "avformat.h" |
31 | #include "internal.h" |
32 | #include "isom.h" |
33 | #include "mov_chan.h" |
34 | #include "libavutil/intreadwrite.h" |
35 | #include "libavutil/intfloat.h" |
36 | #include "libavutil/dict.h" |
37 | #include "caf.h" |
38 | |
39 | typedef struct CafContext { |
40 | int bytes_per_packet; ///< bytes in a packet, or 0 if variable |
41 | int frames_per_packet; ///< frames in a packet, or 0 if variable |
42 | int64_t num_bytes; ///< total number of bytes in stream |
43 | |
44 | int64_t packet_cnt; ///< packet counter |
45 | int64_t frame_cnt; ///< frame counter |
46 | |
47 | int64_t data_start; ///< data start position, in bytes |
48 | int64_t data_size; ///< raw data size, in bytes |
49 | } CafContext; |
50 | |
51 | static int probe(AVProbeData *p) |
52 | { |
53 | if (AV_RB32(p->buf) == MKBETAG('c','a','f','f') && AV_RB16(&p->buf[4]) == 1) |
54 | return AVPROBE_SCORE_MAX; |
55 | return 0; |
56 | } |
57 | |
58 | /** Read audio description chunk */ |
59 | static int read_desc_chunk(AVFormatContext *s) |
60 | { |
61 | AVIOContext *pb = s->pb; |
62 | CafContext *caf = s->priv_data; |
63 | AVStream *st; |
64 | int flags; |
65 | |
66 | /* new audio stream */ |
67 | st = avformat_new_stream(s, NULL); |
68 | if (!st) |
69 | return AVERROR(ENOMEM); |
70 | |
71 | /* parse format description */ |
72 | st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO; |
73 | st->codecpar->sample_rate = av_int2double(avio_rb64(pb)); |
74 | st->codecpar->codec_tag = avio_rl32(pb); |
75 | flags = avio_rb32(pb); |
76 | caf->bytes_per_packet = avio_rb32(pb); |
77 | st->codecpar->block_align = caf->bytes_per_packet; |
78 | caf->frames_per_packet = avio_rb32(pb); |
79 | st->codecpar->channels = avio_rb32(pb); |
80 | st->codecpar->bits_per_coded_sample = avio_rb32(pb); |
81 | |
82 | /* calculate bit rate for constant size packets */ |
83 | if (caf->frames_per_packet > 0 && caf->bytes_per_packet > 0) { |
84 | st->codecpar->bit_rate = (uint64_t)st->codecpar->sample_rate * (uint64_t)caf->bytes_per_packet * 8 |
85 | / (uint64_t)caf->frames_per_packet; |
86 | } else { |
87 | st->codecpar->bit_rate = 0; |
88 | } |
89 | |
90 | /* determine codec */ |
91 | if (st->codecpar->codec_tag == MKTAG('l','p','c','m')) |
92 | st->codecpar->codec_id = ff_mov_get_lpcm_codec_id(st->codecpar->bits_per_coded_sample, (flags ^ 0x2) | 0x4); |
93 | else |
94 | st->codecpar->codec_id = ff_codec_get_id(ff_codec_caf_tags, st->codecpar->codec_tag); |
95 | return 0; |
96 | } |
97 | |
98 | /** Read magic cookie chunk */ |
99 | static int read_kuki_chunk(AVFormatContext *s, int64_t size) |
100 | { |
101 | AVIOContext *pb = s->pb; |
102 | AVStream *st = s->streams[0]; |
103 | |
104 | if (size < 0 || size > INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE) |
105 | return -1; |
106 | |
107 | if (st->codecpar->codec_id == AV_CODEC_ID_AAC) { |
108 | /* The magic cookie format for AAC is an mp4 esds atom. |
109 | The lavc AAC decoder requires the data from the codec specific |
110 | description as extradata input. */ |
111 | int strt, skip; |
112 | |
113 | strt = avio_tell(pb); |
114 | ff_mov_read_esds(s, pb); |
115 | skip = size - (avio_tell(pb) - strt); |
116 | if (skip < 0 || !st->codecpar->extradata || |
117 | st->codecpar->codec_id != AV_CODEC_ID_AAC) { |
118 | av_log(s, AV_LOG_ERROR, "invalid AAC magic cookie\n"); |
119 | return AVERROR_INVALIDDATA; |
120 | } |
121 | avio_skip(pb, skip); |
122 | } else if (st->codecpar->codec_id == AV_CODEC_ID_ALAC) { |
123 | #define ALAC_PREAMBLE 12 |
124 | #define ALAC_HEADER 36 |
125 | #define ALAC_NEW_KUKI 24 |
126 | uint8_t preamble[12]; |
127 | if (size < ALAC_NEW_KUKI) { |
128 | av_log(s, AV_LOG_ERROR, "invalid ALAC magic cookie\n"); |
129 | avio_skip(pb, size); |
130 | return AVERROR_INVALIDDATA; |
131 | } |
132 | if (avio_read(pb, preamble, ALAC_PREAMBLE) != ALAC_PREAMBLE) { |
133 | av_log(s, AV_LOG_ERROR, "failed to read preamble\n"); |
134 | return AVERROR_INVALIDDATA; |
135 | } |
136 | |
137 | av_freep(&st->codecpar->extradata); |
138 | if (ff_alloc_extradata(st->codecpar, ALAC_HEADER)) |
139 | return AVERROR(ENOMEM); |
140 | |
141 | /* For the old style cookie, we skip 12 bytes, then read 36 bytes. |
142 | * The new style cookie only contains the last 24 bytes of what was |
143 | * 36 bytes in the old style cookie, so we fabricate the first 12 bytes |
144 | * in that case to maintain compatibility. */ |
145 | if (!memcmp(&preamble[4], "frmaalac", 8)) { |
146 | if (size < ALAC_PREAMBLE + ALAC_HEADER) { |
147 | av_log(s, AV_LOG_ERROR, "invalid ALAC magic cookie\n"); |
148 | av_freep(&st->codecpar->extradata); |
149 | return AVERROR_INVALIDDATA; |
150 | } |
151 | if (avio_read(pb, st->codecpar->extradata, ALAC_HEADER) != ALAC_HEADER) { |
152 | av_log(s, AV_LOG_ERROR, "failed to read kuki header\n"); |
153 | av_freep(&st->codecpar->extradata); |
154 | return AVERROR_INVALIDDATA; |
155 | } |
156 | avio_skip(pb, size - ALAC_PREAMBLE - ALAC_HEADER); |
157 | } else { |
158 | AV_WB32(st->codecpar->extradata, 36); |
159 | memcpy(&st->codecpar->extradata[4], "alac", 4); |
160 | AV_WB32(&st->codecpar->extradata[8], 0); |
161 | memcpy(&st->codecpar->extradata[12], preamble, 12); |
162 | if (avio_read(pb, &st->codecpar->extradata[24], ALAC_NEW_KUKI - 12) != ALAC_NEW_KUKI - 12) { |
163 | av_log(s, AV_LOG_ERROR, "failed to read new kuki header\n"); |
164 | av_freep(&st->codecpar->extradata); |
165 | return AVERROR_INVALIDDATA; |
166 | } |
167 | avio_skip(pb, size - ALAC_NEW_KUKI); |
168 | } |
169 | } else { |
170 | av_freep(&st->codecpar->extradata); |
171 | if (ff_get_extradata(s, st->codecpar, pb, size) < 0) |
172 | return AVERROR(ENOMEM); |
173 | } |
174 | |
175 | return 0; |
176 | } |
177 | |
178 | /** Read packet table chunk */ |
179 | static int read_pakt_chunk(AVFormatContext *s, int64_t size) |
180 | { |
181 | AVIOContext *pb = s->pb; |
182 | AVStream *st = s->streams[0]; |
183 | CafContext *caf = s->priv_data; |
184 | int64_t pos = 0, ccount, num_packets; |
185 | int i; |
186 | |
187 | ccount = avio_tell(pb); |
188 | |
189 | num_packets = avio_rb64(pb); |
190 | if (num_packets < 0 || INT32_MAX / sizeof(AVIndexEntry) < num_packets) |
191 | return AVERROR_INVALIDDATA; |
192 | |
193 | st->nb_frames = avio_rb64(pb); /* valid frames */ |
194 | st->nb_frames += avio_rb32(pb); /* priming frames */ |
195 | st->nb_frames += avio_rb32(pb); /* remainder frames */ |
196 | |
197 | st->duration = 0; |
198 | for (i = 0; i < num_packets; i++) { |
199 | av_add_index_entry(s->streams[0], pos, st->duration, 0, 0, AVINDEX_KEYFRAME); |
200 | pos += caf->bytes_per_packet ? caf->bytes_per_packet : ff_mp4_read_descr_len(pb); |
201 | st->duration += caf->frames_per_packet ? caf->frames_per_packet : ff_mp4_read_descr_len(pb); |
202 | } |
203 | |
204 | if (avio_tell(pb) - ccount > size) { |
205 | av_log(s, AV_LOG_ERROR, "error reading packet table\n"); |
206 | return AVERROR_INVALIDDATA; |
207 | } |
208 | avio_skip(pb, ccount + size - avio_tell(pb)); |
209 | |
210 | caf->num_bytes = pos; |
211 | return 0; |
212 | } |
213 | |
214 | /** Read information chunk */ |
215 | static void read_info_chunk(AVFormatContext *s, int64_t size) |
216 | { |
217 | AVIOContext *pb = s->pb; |
218 | unsigned int i; |
219 | unsigned int nb_entries = avio_rb32(pb); |
220 | for (i = 0; i < nb_entries && !avio_feof(pb); i++) { |
221 | char key[32]; |
222 | char value[1024]; |
223 | avio_get_str(pb, INT_MAX, key, sizeof(key)); |
224 | avio_get_str(pb, INT_MAX, value, sizeof(value)); |
225 | av_dict_set(&s->metadata, key, value, 0); |
226 | } |
227 | } |
228 | |
229 | static int read_header(AVFormatContext *s) |
230 | { |
231 | AVIOContext *pb = s->pb; |
232 | CafContext *caf = s->priv_data; |
233 | AVStream *st; |
234 | uint32_t tag = 0; |
235 | int found_data, ret; |
236 | int64_t size, pos; |
237 | |
238 | avio_skip(pb, 8); /* magic, version, file flags */ |
239 | |
240 | /* audio description chunk */ |
241 | if (avio_rb32(pb) != MKBETAG('d','e','s','c')) { |
242 | av_log(s, AV_LOG_ERROR, "desc chunk not present\n"); |
243 | return AVERROR_INVALIDDATA; |
244 | } |
245 | size = avio_rb64(pb); |
246 | if (size != 32) |
247 | return AVERROR_INVALIDDATA; |
248 | |
249 | ret = read_desc_chunk(s); |
250 | if (ret) |
251 | return ret; |
252 | st = s->streams[0]; |
253 | |
254 | /* parse each chunk */ |
255 | found_data = 0; |
256 | while (!avio_feof(pb)) { |
257 | |
258 | /* stop at data chunk if seeking is not supported or |
259 | data chunk size is unknown */ |
260 | if (found_data && (caf->data_size < 0 || !(pb->seekable & AVIO_SEEKABLE_NORMAL))) |
261 | break; |
262 | |
263 | tag = avio_rb32(pb); |
264 | size = avio_rb64(pb); |
265 | pos = avio_tell(pb); |
266 | if (avio_feof(pb)) |
267 | break; |
268 | |
269 | switch (tag) { |
270 | case MKBETAG('d','a','t','a'): |
271 | avio_skip(pb, 4); /* edit count */ |
272 | caf->data_start = avio_tell(pb); |
273 | caf->data_size = size < 0 ? -1 : size - 4; |
274 | if (caf->data_size > 0 && (pb->seekable & AVIO_SEEKABLE_NORMAL)) |
275 | avio_skip(pb, caf->data_size); |
276 | found_data = 1; |
277 | break; |
278 | |
279 | case MKBETAG('c','h','a','n'): |
280 | if ((ret = ff_mov_read_chan(s, s->pb, st, size)) < 0) |
281 | return ret; |
282 | break; |
283 | |
284 | /* magic cookie chunk */ |
285 | case MKBETAG('k','u','k','i'): |
286 | if (read_kuki_chunk(s, size)) |
287 | return AVERROR_INVALIDDATA; |
288 | break; |
289 | |
290 | /* packet table chunk */ |
291 | case MKBETAG('p','a','k','t'): |
292 | if (read_pakt_chunk(s, size)) |
293 | return AVERROR_INVALIDDATA; |
294 | break; |
295 | |
296 | case MKBETAG('i','n','f','o'): |
297 | read_info_chunk(s, size); |
298 | break; |
299 | |
300 | default: |
301 | av_log(s, AV_LOG_WARNING, |
302 | "skipping CAF chunk: %08"PRIX32" (%s), size %"PRId64"\n", |
303 | tag, av_fourcc2str(av_bswap32(tag)), size); |
304 | case MKBETAG('f','r','e','e'): |
305 | if (size < 0) |
306 | return AVERROR_INVALIDDATA; |
307 | break; |
308 | } |
309 | |
310 | if (size > 0) { |
311 | if (pos > INT64_MAX - size) |
312 | return AVERROR_INVALIDDATA; |
313 | avio_skip(pb, FFMAX(0, pos + size - avio_tell(pb))); |
314 | } |
315 | } |
316 | |
317 | if (!found_data) |
318 | return AVERROR_INVALIDDATA; |
319 | |
320 | if (caf->bytes_per_packet > 0 && caf->frames_per_packet > 0) { |
321 | if (caf->data_size > 0) |
322 | st->nb_frames = (caf->data_size / caf->bytes_per_packet) * caf->frames_per_packet; |
323 | } else if (st->nb_index_entries && st->duration > 0) { |
324 | if (st->codecpar->sample_rate && caf->data_size / st->duration > INT64_MAX / st->codecpar->sample_rate / 8) { |
325 | av_log(s, AV_LOG_ERROR, "Overflow during bit rate calculation %d * 8 * %"PRId64"\n", |
326 | st->codecpar->sample_rate, caf->data_size / st->duration); |
327 | return AVERROR_INVALIDDATA; |
328 | } |
329 | st->codecpar->bit_rate = st->codecpar->sample_rate * 8LL * |
330 | (caf->data_size / st->duration); |
331 | } else { |
332 | av_log(s, AV_LOG_ERROR, "Missing packet table. It is required when " |
333 | "block size or frame size are variable.\n"); |
334 | return AVERROR_INVALIDDATA; |
335 | } |
336 | |
337 | avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate); |
338 | st->start_time = 0; |
339 | |
340 | /* position the stream at the start of data */ |
341 | if (caf->data_size >= 0) |
342 | avio_seek(pb, caf->data_start, SEEK_SET); |
343 | |
344 | return 0; |
345 | } |
346 | |
347 | #define CAF_MAX_PKT_SIZE 4096 |
348 | |
349 | static int read_packet(AVFormatContext *s, AVPacket *pkt) |
350 | { |
351 | AVIOContext *pb = s->pb; |
352 | AVStream *st = s->streams[0]; |
353 | CafContext *caf = s->priv_data; |
354 | int res, pkt_size = 0, pkt_frames = 0; |
355 | int64_t left = CAF_MAX_PKT_SIZE; |
356 | |
357 | if (avio_feof(pb)) |
358 | return AVERROR_EOF; |
359 | |
360 | /* don't read past end of data chunk */ |
361 | if (caf->data_size > 0) { |
362 | left = (caf->data_start + caf->data_size) - avio_tell(pb); |
363 | if (!left) |
364 | return AVERROR_EOF; |
365 | if (left < 0) |
366 | return AVERROR(EIO); |
367 | } |
368 | |
369 | pkt_frames = caf->frames_per_packet; |
370 | pkt_size = caf->bytes_per_packet; |
371 | |
372 | if (pkt_size > 0 && pkt_frames == 1) { |
373 | pkt_size = (CAF_MAX_PKT_SIZE / pkt_size) * pkt_size; |
374 | pkt_size = FFMIN(pkt_size, left); |
375 | pkt_frames = pkt_size / caf->bytes_per_packet; |
376 | } else if (st->nb_index_entries) { |
377 | if (caf->packet_cnt < st->nb_index_entries - 1) { |
378 | pkt_size = st->index_entries[caf->packet_cnt + 1].pos - st->index_entries[caf->packet_cnt].pos; |
379 | pkt_frames = st->index_entries[caf->packet_cnt + 1].timestamp - st->index_entries[caf->packet_cnt].timestamp; |
380 | } else if (caf->packet_cnt == st->nb_index_entries - 1) { |
381 | pkt_size = caf->num_bytes - st->index_entries[caf->packet_cnt].pos; |
382 | pkt_frames = st->duration - st->index_entries[caf->packet_cnt].timestamp; |
383 | } else { |
384 | return AVERROR(EIO); |
385 | } |
386 | } |
387 | |
388 | if (pkt_size == 0 || pkt_frames == 0 || pkt_size > left) |
389 | return AVERROR(EIO); |
390 | |
391 | res = av_get_packet(pb, pkt, pkt_size); |
392 | if (res < 0) |
393 | return res; |
394 | |
395 | pkt->size = res; |
396 | pkt->stream_index = 0; |
397 | pkt->dts = pkt->pts = caf->frame_cnt; |
398 | |
399 | caf->packet_cnt++; |
400 | caf->frame_cnt += pkt_frames; |
401 | |
402 | return 0; |
403 | } |
404 | |
405 | static int read_seek(AVFormatContext *s, int stream_index, |
406 | int64_t timestamp, int flags) |
407 | { |
408 | AVStream *st = s->streams[0]; |
409 | CafContext *caf = s->priv_data; |
410 | int64_t pos, packet_cnt, frame_cnt; |
411 | |
412 | timestamp = FFMAX(timestamp, 0); |
413 | |
414 | if (caf->frames_per_packet > 0 && caf->bytes_per_packet > 0) { |
415 | /* calculate new byte position based on target frame position */ |
416 | pos = caf->bytes_per_packet * (timestamp / caf->frames_per_packet); |
417 | if (caf->data_size > 0) |
418 | pos = FFMIN(pos, caf->data_size); |
419 | packet_cnt = pos / caf->bytes_per_packet; |
420 | frame_cnt = caf->frames_per_packet * packet_cnt; |
421 | } else if (st->nb_index_entries) { |
422 | packet_cnt = av_index_search_timestamp(st, timestamp, flags); |
423 | frame_cnt = st->index_entries[packet_cnt].timestamp; |
424 | pos = st->index_entries[packet_cnt].pos; |
425 | } else { |
426 | return -1; |
427 | } |
428 | |
429 | if (avio_seek(s->pb, pos + caf->data_start, SEEK_SET) < 0) |
430 | return -1; |
431 | |
432 | caf->packet_cnt = packet_cnt; |
433 | caf->frame_cnt = frame_cnt; |
434 | |
435 | return 0; |
436 | } |
437 | |
438 | AVInputFormat ff_caf_demuxer = { |
439 | .name = "caf", |
440 | .long_name = NULL_IF_CONFIG_SMALL("Apple CAF (Core Audio Format)"), |
441 | .priv_data_size = sizeof(CafContext), |
442 | .read_probe = probe, |
443 | .read_header = read_header, |
444 | .read_packet = read_packet, |
445 | .read_seek = read_seek, |
446 | .codec_tag = (const AVCodecTag* const []){ ff_codec_caf_tags, 0 }, |
447 | }; |
448 |