blob: fb5085c3e8222e48d680079d3a6d14b112741492
1 | /* |
2 | * 3GPP TS 26.245 Timed Text decoder |
3 | * Copyright (c) 2012 Philip Langdale <philipl@overt.org> |
4 | * |
5 | * This file is part of FFmpeg. |
6 | * |
7 | * FFmpeg is free software; you can redistribute it and/or |
8 | * modify it under the terms of the GNU Lesser General Public |
9 | * License as published by the Free Software Foundation; either |
10 | * version 2.1 of the License, or (at your option) any later version. |
11 | * |
12 | * FFmpeg is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | * Lesser General Public License for more details. |
16 | * |
17 | * You should have received a copy of the GNU Lesser General Public |
18 | * License along with FFmpeg; if not, write to the Free Software |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | */ |
21 | |
22 | #include "avcodec.h" |
23 | #include "ass.h" |
24 | #include "libavutil/avstring.h" |
25 | #include "libavutil/common.h" |
26 | #include "libavutil/bprint.h" |
27 | #include "libavutil/intreadwrite.h" |
28 | #include "libavutil/mem.h" |
29 | |
30 | #define STYLE_FLAG_BOLD (1<<0) |
31 | #define STYLE_FLAG_ITALIC (1<<1) |
32 | #define STYLE_FLAG_UNDERLINE (1<<2) |
33 | |
34 | #define BOX_SIZE_INITIAL 40 |
35 | |
36 | #define STYL_BOX (1<<0) |
37 | #define HLIT_BOX (1<<1) |
38 | #define HCLR_BOX (1<<2) |
39 | #define TWRP_BOX (1<<3) |
40 | |
41 | #define BOTTOM_LEFT 1 |
42 | #define BOTTOM_CENTER 2 |
43 | #define BOTTOM_RIGHT 3 |
44 | #define MIDDLE_LEFT 4 |
45 | #define MIDDLE_CENTER 5 |
46 | #define MIDDLE_RIGHT 6 |
47 | #define TOP_LEFT 7 |
48 | #define TOP_CENTER 8 |
49 | #define TOP_RIGHT 9 |
50 | |
51 | typedef struct { |
52 | char *font; |
53 | int fontsize; |
54 | int color; |
55 | int back_color; |
56 | int bold; |
57 | int italic; |
58 | int underline; |
59 | int alignment; |
60 | } MovTextDefault; |
61 | |
62 | typedef struct { |
63 | uint16_t fontID; |
64 | char *font; |
65 | } FontRecord; |
66 | |
67 | typedef struct { |
68 | uint16_t style_start; |
69 | uint16_t style_end; |
70 | uint8_t style_flag; |
71 | uint8_t fontsize; |
72 | uint16_t style_fontID; |
73 | } StyleBox; |
74 | |
75 | typedef struct { |
76 | uint16_t hlit_start; |
77 | uint16_t hlit_end; |
78 | } HighlightBox; |
79 | |
80 | typedef struct { |
81 | uint8_t hlit_color[4]; |
82 | } HilightcolorBox; |
83 | |
84 | typedef struct { |
85 | uint8_t wrap_flag; |
86 | } TextWrapBox; |
87 | |
88 | typedef struct { |
89 | StyleBox **s; |
90 | StyleBox *s_temp; |
91 | HighlightBox h; |
92 | HilightcolorBox c; |
93 | FontRecord **ftab; |
94 | FontRecord *ftab_temp; |
95 | TextWrapBox w; |
96 | MovTextDefault d; |
97 | uint8_t box_flags; |
98 | uint16_t style_entries, ftab_entries; |
99 | uint64_t tracksize; |
100 | int size_var; |
101 | int count_s, count_f; |
102 | int readorder; |
103 | } MovTextContext; |
104 | |
105 | typedef struct { |
106 | uint32_t type; |
107 | size_t base_size; |
108 | int (*decode)(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt); |
109 | } Box; |
110 | |
111 | static void mov_text_cleanup(MovTextContext *m) |
112 | { |
113 | int i; |
114 | if (m->box_flags & STYL_BOX) { |
115 | for(i = 0; i < m->count_s; i++) { |
116 | av_freep(&m->s[i]); |
117 | } |
118 | av_freep(&m->s); |
119 | m->count_s = 0; |
120 | m->style_entries = 0; |
121 | } |
122 | } |
123 | |
124 | static void mov_text_cleanup_ftab(MovTextContext *m) |
125 | { |
126 | int i; |
127 | if (m->ftab_temp) |
128 | av_freep(&m->ftab_temp->font); |
129 | av_freep(&m->ftab_temp); |
130 | if (m->ftab) { |
131 | for(i = 0; i < m->count_f; i++) { |
132 | av_freep(&m->ftab[i]->font); |
133 | av_freep(&m->ftab[i]); |
134 | } |
135 | } |
136 | av_freep(&m->ftab); |
137 | } |
138 | |
139 | static int mov_text_tx3g(AVCodecContext *avctx, MovTextContext *m) |
140 | { |
141 | uint8_t *tx3g_ptr = avctx->extradata; |
142 | int i, box_size, font_length; |
143 | int8_t v_align, h_align; |
144 | int style_fontID; |
145 | StyleBox s_default; |
146 | |
147 | m->count_f = 0; |
148 | m->ftab_entries = 0; |
149 | box_size = BOX_SIZE_INITIAL; /* Size till ftab_entries */ |
150 | if (avctx->extradata_size < box_size) |
151 | return -1; |
152 | |
153 | // Display Flags |
154 | tx3g_ptr += 4; |
155 | // Alignment |
156 | h_align = *tx3g_ptr++; |
157 | v_align = *tx3g_ptr++; |
158 | if (h_align == 0) { |
159 | if (v_align == 0) |
160 | m->d.alignment = TOP_LEFT; |
161 | if (v_align == 1) |
162 | m->d.alignment = MIDDLE_LEFT; |
163 | if (v_align == -1) |
164 | m->d.alignment = BOTTOM_LEFT; |
165 | } |
166 | if (h_align == 1) { |
167 | if (v_align == 0) |
168 | m->d.alignment = TOP_CENTER; |
169 | if (v_align == 1) |
170 | m->d.alignment = MIDDLE_CENTER; |
171 | if (v_align == -1) |
172 | m->d.alignment = BOTTOM_CENTER; |
173 | } |
174 | if (h_align == -1) { |
175 | if (v_align == 0) |
176 | m->d.alignment = TOP_RIGHT; |
177 | if (v_align == 1) |
178 | m->d.alignment = MIDDLE_RIGHT; |
179 | if (v_align == -1) |
180 | m->d.alignment = BOTTOM_RIGHT; |
181 | } |
182 | // Background Color |
183 | m->d.back_color = AV_RB24(tx3g_ptr); |
184 | tx3g_ptr += 4; |
185 | // BoxRecord |
186 | tx3g_ptr += 8; |
187 | // StyleRecord |
188 | tx3g_ptr += 4; |
189 | // fontID |
190 | style_fontID = AV_RB16(tx3g_ptr); |
191 | tx3g_ptr += 2; |
192 | // face-style-flags |
193 | s_default.style_flag = *tx3g_ptr++; |
194 | m->d.bold = s_default.style_flag & STYLE_FLAG_BOLD; |
195 | m->d.italic = s_default.style_flag & STYLE_FLAG_ITALIC; |
196 | m->d.underline = s_default.style_flag & STYLE_FLAG_UNDERLINE; |
197 | // fontsize |
198 | m->d.fontsize = *tx3g_ptr++; |
199 | // Primary color |
200 | m->d.color = AV_RB24(tx3g_ptr); |
201 | tx3g_ptr += 4; |
202 | // FontRecord |
203 | // FontRecord Size |
204 | tx3g_ptr += 4; |
205 | // ftab |
206 | tx3g_ptr += 4; |
207 | |
208 | m->ftab_entries = AV_RB16(tx3g_ptr); |
209 | tx3g_ptr += 2; |
210 | |
211 | for (i = 0; i < m->ftab_entries; i++) { |
212 | |
213 | box_size += 3; |
214 | if (avctx->extradata_size < box_size) { |
215 | mov_text_cleanup_ftab(m); |
216 | m->ftab_entries = 0; |
217 | return -1; |
218 | } |
219 | m->ftab_temp = av_mallocz(sizeof(*m->ftab_temp)); |
220 | if (!m->ftab_temp) { |
221 | mov_text_cleanup_ftab(m); |
222 | return AVERROR(ENOMEM); |
223 | } |
224 | m->ftab_temp->fontID = AV_RB16(tx3g_ptr); |
225 | tx3g_ptr += 2; |
226 | font_length = *tx3g_ptr++; |
227 | |
228 | box_size = box_size + font_length; |
229 | if (avctx->extradata_size < box_size) { |
230 | mov_text_cleanup_ftab(m); |
231 | m->ftab_entries = 0; |
232 | return -1; |
233 | } |
234 | m->ftab_temp->font = av_malloc(font_length + 1); |
235 | if (!m->ftab_temp->font) { |
236 | mov_text_cleanup_ftab(m); |
237 | return AVERROR(ENOMEM); |
238 | } |
239 | memcpy(m->ftab_temp->font, tx3g_ptr, font_length); |
240 | m->ftab_temp->font[font_length] = '\0'; |
241 | av_dynarray_add(&m->ftab, &m->count_f, m->ftab_temp); |
242 | if (!m->ftab) { |
243 | mov_text_cleanup_ftab(m); |
244 | return AVERROR(ENOMEM); |
245 | } |
246 | m->ftab_temp = NULL; |
247 | tx3g_ptr = tx3g_ptr + font_length; |
248 | } |
249 | for (i = 0; i < m->ftab_entries; i++) { |
250 | if (style_fontID == m->ftab[i]->fontID) |
251 | m->d.font = m->ftab[i]->font; |
252 | } |
253 | return 0; |
254 | } |
255 | |
256 | static int decode_twrp(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt) |
257 | { |
258 | m->box_flags |= TWRP_BOX; |
259 | m->w.wrap_flag = *tsmb++; |
260 | return 0; |
261 | } |
262 | |
263 | static int decode_hlit(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt) |
264 | { |
265 | m->box_flags |= HLIT_BOX; |
266 | m->h.hlit_start = AV_RB16(tsmb); |
267 | tsmb += 2; |
268 | m->h.hlit_end = AV_RB16(tsmb); |
269 | tsmb += 2; |
270 | return 0; |
271 | } |
272 | |
273 | static int decode_hclr(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt) |
274 | { |
275 | m->box_flags |= HCLR_BOX; |
276 | memcpy(m->c.hlit_color, tsmb, 4); |
277 | tsmb += 4; |
278 | return 0; |
279 | } |
280 | |
281 | static int decode_styl(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt) |
282 | { |
283 | int i; |
284 | int style_entries = AV_RB16(tsmb); |
285 | tsmb += 2; |
286 | // A single style record is of length 12 bytes. |
287 | if (m->tracksize + m->size_var + 2 + style_entries * 12 > avpkt->size) |
288 | return -1; |
289 | |
290 | m->style_entries = style_entries; |
291 | |
292 | m->box_flags |= STYL_BOX; |
293 | for(i = 0; i < m->style_entries; i++) { |
294 | m->s_temp = av_malloc(sizeof(*m->s_temp)); |
295 | if (!m->s_temp) { |
296 | mov_text_cleanup(m); |
297 | return AVERROR(ENOMEM); |
298 | } |
299 | m->s_temp->style_start = AV_RB16(tsmb); |
300 | tsmb += 2; |
301 | m->s_temp->style_end = AV_RB16(tsmb); |
302 | tsmb += 2; |
303 | m->s_temp->style_fontID = AV_RB16(tsmb); |
304 | tsmb += 2; |
305 | m->s_temp->style_flag = AV_RB8(tsmb); |
306 | tsmb++; |
307 | m->s_temp->fontsize = AV_RB8(tsmb); |
308 | av_dynarray_add(&m->s, &m->count_s, m->s_temp); |
309 | if(!m->s) { |
310 | mov_text_cleanup(m); |
311 | return AVERROR(ENOMEM); |
312 | } |
313 | tsmb++; |
314 | // text-color-rgba |
315 | tsmb += 4; |
316 | } |
317 | return 0; |
318 | } |
319 | |
320 | static const Box box_types[] = { |
321 | { MKBETAG('s','t','y','l'), 2, decode_styl }, |
322 | { MKBETAG('h','l','i','t'), 4, decode_hlit }, |
323 | { MKBETAG('h','c','l','r'), 4, decode_hclr }, |
324 | { MKBETAG('t','w','r','p'), 1, decode_twrp } |
325 | }; |
326 | |
327 | const static size_t box_count = FF_ARRAY_ELEMS(box_types); |
328 | |
329 | static int text_to_ass(AVBPrint *buf, const char *text, const char *text_end, |
330 | MovTextContext *m) |
331 | { |
332 | int i = 0; |
333 | int j = 0; |
334 | int text_pos = 0; |
335 | |
336 | if (text < text_end && m->box_flags & TWRP_BOX) { |
337 | if (m->w.wrap_flag == 1) { |
338 | av_bprintf(buf, "{\\q1}"); /* End of line wrap */ |
339 | } else { |
340 | av_bprintf(buf, "{\\q2}"); /* No wrap */ |
341 | } |
342 | } |
343 | |
344 | while (text < text_end) { |
345 | if (m->box_flags & STYL_BOX) { |
346 | for (i = 0; i < m->style_entries; i++) { |
347 | if (m->s[i]->style_flag && text_pos == m->s[i]->style_end) { |
348 | av_bprintf(buf, "{\\r}"); |
349 | } |
350 | } |
351 | for (i = 0; i < m->style_entries; i++) { |
352 | if (m->s[i]->style_flag && text_pos == m->s[i]->style_start) { |
353 | if (m->s[i]->style_flag & STYLE_FLAG_BOLD) |
354 | av_bprintf(buf, "{\\b1}"); |
355 | if (m->s[i]->style_flag & STYLE_FLAG_ITALIC) |
356 | av_bprintf(buf, "{\\i1}"); |
357 | if (m->s[i]->style_flag & STYLE_FLAG_UNDERLINE) |
358 | av_bprintf(buf, "{\\u1}"); |
359 | av_bprintf(buf, "{\\fs%d}", m->s[i]->fontsize); |
360 | for (j = 0; j < m->ftab_entries; j++) { |
361 | if (m->s[i]->style_fontID == m->ftab[j]->fontID) |
362 | av_bprintf(buf, "{\\fn%s}", m->ftab[j]->font); |
363 | } |
364 | } |
365 | } |
366 | } |
367 | if (m->box_flags & HLIT_BOX) { |
368 | if (text_pos == m->h.hlit_start) { |
369 | /* If hclr box is present, set the secondary color to the color |
370 | * specified. Otherwise, set primary color to white and secondary |
371 | * color to black. These colors will come from TextSampleModifier |
372 | * boxes in future and inverse video technique for highlight will |
373 | * be implemented. |
374 | */ |
375 | if (m->box_flags & HCLR_BOX) { |
376 | av_bprintf(buf, "{\\2c&H%02x%02x%02x&}", m->c.hlit_color[2], |
377 | m->c.hlit_color[1], m->c.hlit_color[0]); |
378 | } else { |
379 | av_bprintf(buf, "{\\1c&H000000&}{\\2c&HFFFFFF&}"); |
380 | } |
381 | } |
382 | if (text_pos == m->h.hlit_end) { |
383 | if (m->box_flags & HCLR_BOX) { |
384 | av_bprintf(buf, "{\\2c&H000000&}"); |
385 | } else { |
386 | av_bprintf(buf, "{\\1c&HFFFFFF&}{\\2c&H000000&}"); |
387 | } |
388 | } |
389 | } |
390 | |
391 | switch (*text) { |
392 | case '\r': |
393 | break; |
394 | case '\n': |
395 | av_bprintf(buf, "\\N"); |
396 | break; |
397 | default: |
398 | av_bprint_chars(buf, *text, 1); |
399 | break; |
400 | } |
401 | text++; |
402 | text_pos++; |
403 | } |
404 | |
405 | return 0; |
406 | } |
407 | |
408 | static int mov_text_init(AVCodecContext *avctx) { |
409 | /* |
410 | * TODO: Handle the default text style. |
411 | * NB: Most players ignore styles completely, with the result that |
412 | * it's very common to find files where the default style is broken |
413 | * and respecting it results in a worse experience than ignoring it. |
414 | */ |
415 | int ret; |
416 | MovTextContext *m = avctx->priv_data; |
417 | ret = mov_text_tx3g(avctx, m); |
418 | if (ret == 0) { |
419 | return ff_ass_subtitle_header(avctx, m->d.font, m->d.fontsize, m->d.color, |
420 | m->d.back_color, m->d.bold, m->d.italic, |
421 | m->d.underline, ASS_DEFAULT_BORDERSTYLE, |
422 | m->d.alignment); |
423 | } else |
424 | return ff_ass_subtitle_header_default(avctx); |
425 | } |
426 | |
427 | static int mov_text_decode_frame(AVCodecContext *avctx, |
428 | void *data, int *got_sub_ptr, AVPacket *avpkt) |
429 | { |
430 | AVSubtitle *sub = data; |
431 | MovTextContext *m = avctx->priv_data; |
432 | int ret; |
433 | AVBPrint buf; |
434 | char *ptr = avpkt->data; |
435 | char *end; |
436 | int text_length, tsmb_type, ret_tsmb; |
437 | uint64_t tsmb_size; |
438 | const uint8_t *tsmb; |
439 | |
440 | if (!ptr || avpkt->size < 2) |
441 | return AVERROR_INVALIDDATA; |
442 | |
443 | /* |
444 | * A packet of size two with value zero is an empty subtitle |
445 | * used to mark the end of the previous non-empty subtitle. |
446 | * We can just drop them here as we have duration information |
447 | * already. If the value is non-zero, then it's technically a |
448 | * bad packet. |
449 | */ |
450 | if (avpkt->size == 2) |
451 | return AV_RB16(ptr) == 0 ? 0 : AVERROR_INVALIDDATA; |
452 | |
453 | /* |
454 | * The first two bytes of the packet are the length of the text string |
455 | * In complex cases, there are style descriptors appended to the string |
456 | * so we can't just assume the packet size is the string size. |
457 | */ |
458 | text_length = AV_RB16(ptr); |
459 | end = ptr + FFMIN(2 + text_length, avpkt->size); |
460 | ptr += 2; |
461 | |
462 | mov_text_cleanup(m); |
463 | |
464 | tsmb_size = 0; |
465 | m->tracksize = 2 + text_length; |
466 | m->style_entries = 0; |
467 | m->box_flags = 0; |
468 | m->count_s = 0; |
469 | // Note that the spec recommends lines be no longer than 2048 characters. |
470 | av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED); |
471 | if (text_length + 2 != avpkt->size) { |
472 | while (m->tracksize + 8 <= avpkt->size) { |
473 | // A box is a minimum of 8 bytes. |
474 | tsmb = ptr + m->tracksize - 2; |
475 | tsmb_size = AV_RB32(tsmb); |
476 | tsmb += 4; |
477 | tsmb_type = AV_RB32(tsmb); |
478 | tsmb += 4; |
479 | |
480 | if (tsmb_size == 1) { |
481 | if (m->tracksize + 16 > avpkt->size) |
482 | break; |
483 | tsmb_size = AV_RB64(tsmb); |
484 | tsmb += 8; |
485 | m->size_var = 16; |
486 | } else |
487 | m->size_var = 8; |
488 | //size_var is equal to 8 or 16 depending on the size of box |
489 | |
490 | if (tsmb_size == 0) { |
491 | av_log(avctx, AV_LOG_ERROR, "tsmb_size is 0\n"); |
492 | return AVERROR_INVALIDDATA; |
493 | } |
494 | |
495 | if (tsmb_size > avpkt->size - m->tracksize) |
496 | break; |
497 | |
498 | for (size_t i = 0; i < box_count; i++) { |
499 | if (tsmb_type == box_types[i].type) { |
500 | if (m->tracksize + m->size_var + box_types[i].base_size > avpkt->size) |
501 | break; |
502 | ret_tsmb = box_types[i].decode(tsmb, m, avpkt); |
503 | if (ret_tsmb == -1) |
504 | break; |
505 | } |
506 | } |
507 | m->tracksize = m->tracksize + tsmb_size; |
508 | } |
509 | text_to_ass(&buf, ptr, end, m); |
510 | mov_text_cleanup(m); |
511 | } else |
512 | text_to_ass(&buf, ptr, end, m); |
513 | |
514 | ret = ff_ass_add_rect(sub, buf.str, m->readorder++, 0, NULL, NULL); |
515 | av_bprint_finalize(&buf, NULL); |
516 | if (ret < 0) |
517 | return ret; |
518 | *got_sub_ptr = sub->num_rects > 0; |
519 | return avpkt->size; |
520 | } |
521 | |
522 | static int mov_text_decode_close(AVCodecContext *avctx) |
523 | { |
524 | MovTextContext *m = avctx->priv_data; |
525 | mov_text_cleanup_ftab(m); |
526 | mov_text_cleanup(m); |
527 | return 0; |
528 | } |
529 | |
530 | static void mov_text_flush(AVCodecContext *avctx) |
531 | { |
532 | MovTextContext *m = avctx->priv_data; |
533 | if (!(avctx->flags2 & AV_CODEC_FLAG2_RO_FLUSH_NOOP)) |
534 | m->readorder = 0; |
535 | } |
536 | |
537 | AVCodec ff_movtext_decoder = { |
538 | .name = "mov_text", |
539 | .long_name = NULL_IF_CONFIG_SMALL("3GPP Timed Text subtitle"), |
540 | .type = AVMEDIA_TYPE_SUBTITLE, |
541 | .id = AV_CODEC_ID_MOV_TEXT, |
542 | .priv_data_size = sizeof(MovTextContext), |
543 | .init = mov_text_init, |
544 | .decode = mov_text_decode_frame, |
545 | .close = mov_text_decode_close, |
546 | .flush = mov_text_flush, |
547 | }; |
548 |