blob: 108f909c84872d5b90a3d22024a6300bd8ae932a
1 | /* |
2 | * Copyright (c) 2012-2013 Clément Bœsch <u pkh me> |
3 | * |
4 | * This file is part of FFmpeg. |
5 | * |
6 | * FFmpeg is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU Lesser General Public |
8 | * License as published by the Free Software Foundation; either |
9 | * version 2.1 of the License, or (at your option) any later version. |
10 | * |
11 | * FFmpeg is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * Lesser General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU Lesser General Public |
17 | * License along with FFmpeg; if not, write to the Free Software |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | */ |
20 | |
21 | #include "avformat.h" |
22 | #include "subtitles.h" |
23 | #include "avio_internal.h" |
24 | #include "libavutil/avassert.h" |
25 | #include "libavutil/avstring.h" |
26 | |
27 | void ff_text_init_avio(void *s, FFTextReader *r, AVIOContext *pb) |
28 | { |
29 | int i; |
30 | r->pb = pb; |
31 | r->buf_pos = r->buf_len = 0; |
32 | r->type = FF_UTF_8; |
33 | for (i = 0; i < 2; i++) |
34 | r->buf[r->buf_len++] = avio_r8(r->pb); |
35 | if (strncmp("\xFF\xFE", r->buf, 2) == 0) { |
36 | r->type = FF_UTF16LE; |
37 | r->buf_pos += 2; |
38 | } else if (strncmp("\xFE\xFF", r->buf, 2) == 0) { |
39 | r->type = FF_UTF16BE; |
40 | r->buf_pos += 2; |
41 | } else { |
42 | r->buf[r->buf_len++] = avio_r8(r->pb); |
43 | if (strncmp("\xEF\xBB\xBF", r->buf, 3) == 0) { |
44 | // UTF8 |
45 | r->buf_pos += 3; |
46 | } |
47 | } |
48 | if (s && (r->type == FF_UTF16LE || r->type == FF_UTF16BE)) |
49 | av_log(s, AV_LOG_INFO, |
50 | "UTF16 is automatically converted to UTF8, do not specify a character encoding\n"); |
51 | } |
52 | |
53 | void ff_text_init_buf(FFTextReader *r, void *buf, size_t size) |
54 | { |
55 | memset(&r->buf_pb, 0, sizeof(r->buf_pb)); |
56 | ffio_init_context(&r->buf_pb, buf, size, 0, NULL, NULL, NULL, NULL); |
57 | ff_text_init_avio(NULL, r, &r->buf_pb); |
58 | } |
59 | |
60 | int64_t ff_text_pos(FFTextReader *r) |
61 | { |
62 | return avio_tell(r->pb) - r->buf_len + r->buf_pos; |
63 | } |
64 | |
65 | int ff_text_r8(FFTextReader *r) |
66 | { |
67 | uint32_t val; |
68 | uint8_t tmp; |
69 | if (r->buf_pos < r->buf_len) |
70 | return r->buf[r->buf_pos++]; |
71 | if (r->type == FF_UTF16LE) { |
72 | GET_UTF16(val, avio_rl16(r->pb), return 0;) |
73 | } else if (r->type == FF_UTF16BE) { |
74 | GET_UTF16(val, avio_rb16(r->pb), return 0;) |
75 | } else { |
76 | return avio_r8(r->pb); |
77 | } |
78 | if (!val) |
79 | return 0; |
80 | r->buf_pos = 0; |
81 | r->buf_len = 0; |
82 | PUT_UTF8(val, tmp, r->buf[r->buf_len++] = tmp;) |
83 | return r->buf[r->buf_pos++]; // buf_len is at least 1 |
84 | } |
85 | |
86 | void ff_text_read(FFTextReader *r, char *buf, size_t size) |
87 | { |
88 | for ( ; size > 0; size--) |
89 | *buf++ = ff_text_r8(r); |
90 | } |
91 | |
92 | int ff_text_eof(FFTextReader *r) |
93 | { |
94 | return r->buf_pos >= r->buf_len && avio_feof(r->pb); |
95 | } |
96 | |
97 | int ff_text_peek_r8(FFTextReader *r) |
98 | { |
99 | int c; |
100 | if (r->buf_pos < r->buf_len) |
101 | return r->buf[r->buf_pos]; |
102 | c = ff_text_r8(r); |
103 | if (!avio_feof(r->pb)) { |
104 | r->buf_pos = 0; |
105 | r->buf_len = 1; |
106 | r->buf[0] = c; |
107 | } |
108 | return c; |
109 | } |
110 | |
111 | AVPacket *ff_subtitles_queue_insert(FFDemuxSubtitlesQueue *q, |
112 | const uint8_t *event, size_t len, int merge) |
113 | { |
114 | AVPacket *subs, *sub; |
115 | |
116 | if (merge && q->nb_subs > 0) { |
117 | /* merge with previous event */ |
118 | |
119 | int old_len; |
120 | sub = &q->subs[q->nb_subs - 1]; |
121 | old_len = sub->size; |
122 | if (av_grow_packet(sub, len) < 0) |
123 | return NULL; |
124 | memcpy(sub->data + old_len, event, len); |
125 | } else { |
126 | /* new event */ |
127 | |
128 | if (q->nb_subs >= INT_MAX/sizeof(*q->subs) - 1) |
129 | return NULL; |
130 | subs = av_fast_realloc(q->subs, &q->allocated_size, |
131 | (q->nb_subs + 1) * sizeof(*q->subs)); |
132 | if (!subs) |
133 | return NULL; |
134 | q->subs = subs; |
135 | sub = &subs[q->nb_subs++]; |
136 | if (av_new_packet(sub, len) < 0) |
137 | return NULL; |
138 | sub->flags |= AV_PKT_FLAG_KEY; |
139 | sub->pts = sub->dts = 0; |
140 | memcpy(sub->data, event, len); |
141 | } |
142 | return sub; |
143 | } |
144 | |
145 | static int cmp_pkt_sub_ts_pos(const void *a, const void *b) |
146 | { |
147 | const AVPacket *s1 = a; |
148 | const AVPacket *s2 = b; |
149 | if (s1->pts == s2->pts) |
150 | return FFDIFFSIGN(s1->pos, s2->pos); |
151 | return FFDIFFSIGN(s1->pts , s2->pts); |
152 | } |
153 | |
154 | static int cmp_pkt_sub_pos_ts(const void *a, const void *b) |
155 | { |
156 | const AVPacket *s1 = a; |
157 | const AVPacket *s2 = b; |
158 | if (s1->pos == s2->pos) { |
159 | if (s1->pts == s2->pts) |
160 | return 0; |
161 | return s1->pts > s2->pts ? 1 : -1; |
162 | } |
163 | return s1->pos > s2->pos ? 1 : -1; |
164 | } |
165 | |
166 | static void drop_dups(void *log_ctx, FFDemuxSubtitlesQueue *q) |
167 | { |
168 | int i, drop = 0; |
169 | |
170 | for (i = 1; i < q->nb_subs; i++) { |
171 | const int last_id = i - 1 - drop; |
172 | const AVPacket *last = &q->subs[last_id]; |
173 | |
174 | if (q->subs[i].pts == last->pts && |
175 | q->subs[i].duration == last->duration && |
176 | q->subs[i].stream_index == last->stream_index && |
177 | !strcmp(q->subs[i].data, last->data)) { |
178 | |
179 | av_packet_unref(&q->subs[i]); |
180 | drop++; |
181 | } else if (drop) { |
182 | q->subs[last_id + 1] = q->subs[i]; |
183 | memset(&q->subs[i], 0, sizeof(q->subs[i])); // for safety |
184 | } |
185 | } |
186 | |
187 | if (drop) { |
188 | q->nb_subs -= drop; |
189 | av_log(log_ctx, AV_LOG_WARNING, "Dropping %d duplicated subtitle events\n", drop); |
190 | } |
191 | } |
192 | |
193 | void ff_subtitles_queue_finalize(void *log_ctx, FFDemuxSubtitlesQueue *q) |
194 | { |
195 | int i; |
196 | |
197 | qsort(q->subs, q->nb_subs, sizeof(*q->subs), |
198 | q->sort == SUB_SORT_TS_POS ? cmp_pkt_sub_ts_pos |
199 | : cmp_pkt_sub_pos_ts); |
200 | for (i = 0; i < q->nb_subs; i++) |
201 | if (q->subs[i].duration < 0 && i < q->nb_subs - 1) |
202 | q->subs[i].duration = q->subs[i + 1].pts - q->subs[i].pts; |
203 | |
204 | if (!q->keep_duplicates) |
205 | drop_dups(log_ctx, q); |
206 | } |
207 | |
208 | int ff_subtitles_queue_read_packet(FFDemuxSubtitlesQueue *q, AVPacket *pkt) |
209 | { |
210 | AVPacket *sub = q->subs + q->current_sub_idx; |
211 | |
212 | if (q->current_sub_idx == q->nb_subs) |
213 | return AVERROR_EOF; |
214 | if (av_copy_packet(pkt, sub) < 0) { |
215 | return AVERROR(ENOMEM); |
216 | } |
217 | |
218 | pkt->dts = pkt->pts; |
219 | q->current_sub_idx++; |
220 | return 0; |
221 | } |
222 | |
223 | static int search_sub_ts(const FFDemuxSubtitlesQueue *q, int64_t ts) |
224 | { |
225 | int s1 = 0, s2 = q->nb_subs - 1; |
226 | |
227 | if (s2 < s1) |
228 | return AVERROR(ERANGE); |
229 | |
230 | for (;;) { |
231 | int mid; |
232 | |
233 | if (s1 == s2) |
234 | return s1; |
235 | if (s1 == s2 - 1) |
236 | return q->subs[s1].pts <= q->subs[s2].pts ? s1 : s2; |
237 | mid = (s1 + s2) / 2; |
238 | if (q->subs[mid].pts <= ts) |
239 | s1 = mid; |
240 | else |
241 | s2 = mid; |
242 | } |
243 | } |
244 | |
245 | int ff_subtitles_queue_seek(FFDemuxSubtitlesQueue *q, AVFormatContext *s, int stream_index, |
246 | int64_t min_ts, int64_t ts, int64_t max_ts, int flags) |
247 | { |
248 | if (flags & AVSEEK_FLAG_BYTE) { |
249 | return AVERROR(ENOSYS); |
250 | } else if (flags & AVSEEK_FLAG_FRAME) { |
251 | if (ts < 0 || ts >= q->nb_subs) |
252 | return AVERROR(ERANGE); |
253 | q->current_sub_idx = ts; |
254 | } else { |
255 | int i, idx = search_sub_ts(q, ts); |
256 | int64_t ts_selected; |
257 | |
258 | if (idx < 0) |
259 | return idx; |
260 | for (i = idx; i < q->nb_subs && q->subs[i].pts < min_ts; i++) |
261 | if (stream_index == -1 || q->subs[i].stream_index == stream_index) |
262 | idx = i; |
263 | for (i = idx; i > 0 && q->subs[i].pts > max_ts; i--) |
264 | if (stream_index == -1 || q->subs[i].stream_index == stream_index) |
265 | idx = i; |
266 | |
267 | ts_selected = q->subs[idx].pts; |
268 | if (ts_selected < min_ts || ts_selected > max_ts) |
269 | return AVERROR(ERANGE); |
270 | |
271 | /* look back in the latest subtitles for overlapping subtitles */ |
272 | for (i = idx - 1; i >= 0; i--) { |
273 | int64_t pts = q->subs[i].pts; |
274 | if (q->subs[i].duration <= 0 || |
275 | (stream_index != -1 && q->subs[i].stream_index != stream_index)) |
276 | continue; |
277 | if (pts >= min_ts && pts > ts_selected - q->subs[i].duration) |
278 | idx = i; |
279 | else |
280 | break; |
281 | } |
282 | |
283 | /* If the queue is used to store multiple subtitles streams (like with |
284 | * VobSub) and the stream index is not specified, we need to make sure |
285 | * to focus on the smallest file position offset for a same timestamp; |
286 | * queue is ordered by pts and then filepos, so we can take the first |
287 | * entry for a given timestamp. */ |
288 | if (stream_index == -1) |
289 | while (idx > 0 && q->subs[idx - 1].pts == q->subs[idx].pts) |
290 | idx--; |
291 | |
292 | q->current_sub_idx = idx; |
293 | } |
294 | return 0; |
295 | } |
296 | |
297 | void ff_subtitles_queue_clean(FFDemuxSubtitlesQueue *q) |
298 | { |
299 | int i; |
300 | |
301 | for (i = 0; i < q->nb_subs; i++) |
302 | av_packet_unref(&q->subs[i]); |
303 | av_freep(&q->subs); |
304 | q->nb_subs = q->allocated_size = q->current_sub_idx = 0; |
305 | } |
306 | |
307 | int ff_smil_extract_next_text_chunk(FFTextReader *tr, AVBPrint *buf, char *c) |
308 | { |
309 | int i = 0; |
310 | char end_chr; |
311 | |
312 | if (!*c) // cached char? |
313 | *c = ff_text_r8(tr); |
314 | if (!*c) |
315 | return 0; |
316 | |
317 | end_chr = *c == '<' ? '>' : '<'; |
318 | do { |
319 | av_bprint_chars(buf, *c, 1); |
320 | *c = ff_text_r8(tr); |
321 | i++; |
322 | } while (*c != end_chr && *c); |
323 | if (end_chr == '>') { |
324 | av_bprint_chars(buf, '>', 1); |
325 | *c = 0; |
326 | } |
327 | return i; |
328 | } |
329 | |
330 | const char *ff_smil_get_attr_ptr(const char *s, const char *attr) |
331 | { |
332 | int in_quotes = 0; |
333 | const size_t len = strlen(attr); |
334 | |
335 | while (*s) { |
336 | while (*s) { |
337 | if (!in_quotes && av_isspace(*s)) |
338 | break; |
339 | in_quotes ^= *s == '"'; // XXX: support escaping? |
340 | s++; |
341 | } |
342 | while (av_isspace(*s)) |
343 | s++; |
344 | if (!av_strncasecmp(s, attr, len) && s[len] == '=') |
345 | return s + len + 1 + (s[len + 1] == '"'); |
346 | } |
347 | return NULL; |
348 | } |
349 | |
350 | static inline int is_eol(char c) |
351 | { |
352 | return c == '\r' || c == '\n'; |
353 | } |
354 | |
355 | void ff_subtitles_read_text_chunk(FFTextReader *tr, AVBPrint *buf) |
356 | { |
357 | char eol_buf[5], last_was_cr = 0; |
358 | int n = 0, i = 0, nb_eol = 0; |
359 | |
360 | av_bprint_clear(buf); |
361 | |
362 | for (;;) { |
363 | char c = ff_text_r8(tr); |
364 | |
365 | if (!c) |
366 | break; |
367 | |
368 | /* ignore all initial line breaks */ |
369 | if (n == 0 && is_eol(c)) |
370 | continue; |
371 | |
372 | /* line break buffering: we don't want to add the trailing \r\n */ |
373 | if (is_eol(c)) { |
374 | nb_eol += c == '\n' || last_was_cr; |
375 | if (nb_eol == 2) |
376 | break; |
377 | eol_buf[i++] = c; |
378 | if (i == sizeof(eol_buf) - 1) |
379 | break; |
380 | last_was_cr = c == '\r'; |
381 | continue; |
382 | } |
383 | |
384 | /* only one line break followed by data: we flush the line breaks |
385 | * buffer */ |
386 | if (i) { |
387 | eol_buf[i] = 0; |
388 | av_bprintf(buf, "%s", eol_buf); |
389 | i = nb_eol = 0; |
390 | } |
391 | |
392 | av_bprint_chars(buf, c, 1); |
393 | n++; |
394 | } |
395 | } |
396 | |
397 | void ff_subtitles_read_chunk(AVIOContext *pb, AVBPrint *buf) |
398 | { |
399 | FFTextReader tr; |
400 | tr.buf_pos = tr.buf_len = 0; |
401 | tr.type = 0; |
402 | tr.pb = pb; |
403 | ff_subtitles_read_text_chunk(&tr, buf); |
404 | } |
405 | |
406 | ptrdiff_t ff_subtitles_read_line(FFTextReader *tr, char *buf, size_t size) |
407 | { |
408 | size_t cur = 0; |
409 | if (!size) |
410 | return 0; |
411 | while (cur + 1 < size) { |
412 | unsigned char c = ff_text_r8(tr); |
413 | if (!c) |
414 | return ff_text_eof(tr) ? cur : AVERROR_INVALIDDATA; |
415 | if (c == '\r' || c == '\n') |
416 | break; |
417 | buf[cur++] = c; |
418 | buf[cur] = '\0'; |
419 | } |
420 | if (ff_text_peek_r8(tr) == '\r') |
421 | ff_text_r8(tr); |
422 | if (ff_text_peek_r8(tr) == '\n') |
423 | ff_text_r8(tr); |
424 | return cur; |
425 | } |
426 |