blob: 20e01e206e6c371770367a0c950295ba81e32798
1 | /* |
2 | * 3GPP TS 26.245 Timed Text encoder |
3 | * Copyright (c) 2012 Philip Langdale <philipl@overt.org> |
4 | * |
5 | * This file is part of FFmpeg. |
6 | * |
7 | * FFmpeg is free software; you can redistribute it and/or |
8 | * modify it under the terms of the GNU Lesser General Public |
9 | * License as published by the Free Software Foundation; either |
10 | * version 2.1 of the License, or (at your option) any later version. |
11 | * |
12 | * FFmpeg is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | * Lesser General Public License for more details. |
16 | * |
17 | * You should have received a copy of the GNU Lesser General Public |
18 | * License along with FFmpeg; if not, write to the Free Software |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | */ |
21 | |
22 | #include <stdarg.h> |
23 | #include "avcodec.h" |
24 | #include "libavutil/avassert.h" |
25 | #include "libavutil/avstring.h" |
26 | #include "libavutil/intreadwrite.h" |
27 | #include "libavutil/mem.h" |
28 | #include "libavutil/common.h" |
29 | #include "ass_split.h" |
30 | #include "ass.h" |
31 | |
32 | #define STYLE_FLAG_BOLD (1<<0) |
33 | #define STYLE_FLAG_ITALIC (1<<1) |
34 | #define STYLE_FLAG_UNDERLINE (1<<2) |
35 | #define STYLE_RECORD_SIZE 12 |
36 | #define SIZE_ADD 10 |
37 | |
38 | #define STYL_BOX (1<<0) |
39 | #define HLIT_BOX (1<<1) |
40 | #define HCLR_BOX (1<<2) |
41 | |
42 | #define av_bprint_append_any(buf, data, size) av_bprint_append_data(buf, ((const char*)data), size) |
43 | |
44 | typedef struct { |
45 | uint16_t style_start; |
46 | uint16_t style_end; |
47 | uint8_t style_flag; |
48 | } StyleBox; |
49 | |
50 | typedef struct { |
51 | uint16_t start; |
52 | uint16_t end; |
53 | } HighlightBox; |
54 | |
55 | typedef struct { |
56 | uint32_t color; |
57 | } HilightcolorBox; |
58 | |
59 | typedef struct { |
60 | ASSSplitContext *ass_ctx; |
61 | AVBPrint buffer; |
62 | StyleBox **style_attributes; |
63 | StyleBox *style_attributes_temp; |
64 | HighlightBox hlit; |
65 | HilightcolorBox hclr; |
66 | int count; |
67 | uint8_t box_flags; |
68 | uint16_t style_entries; |
69 | uint16_t style_fontID; |
70 | uint8_t style_fontsize; |
71 | uint32_t style_color; |
72 | uint16_t text_pos; |
73 | } MovTextContext; |
74 | |
75 | typedef struct { |
76 | uint32_t type; |
77 | void (*encode)(MovTextContext *s, uint32_t tsmb_type); |
78 | } Box; |
79 | |
80 | static void mov_text_cleanup(MovTextContext *s) |
81 | { |
82 | int j; |
83 | if (s->box_flags & STYL_BOX) { |
84 | for (j = 0; j < s->count; j++) { |
85 | av_freep(&s->style_attributes[j]); |
86 | } |
87 | av_freep(&s->style_attributes); |
88 | } |
89 | } |
90 | |
91 | static void encode_styl(MovTextContext *s, uint32_t tsmb_type) |
92 | { |
93 | int j; |
94 | uint32_t tsmb_size; |
95 | if (s->box_flags & STYL_BOX) { |
96 | tsmb_size = s->count * STYLE_RECORD_SIZE + SIZE_ADD; |
97 | tsmb_size = AV_RB32(&tsmb_size); |
98 | s->style_entries = AV_RB16(&s->count); |
99 | s->style_fontID = 0x00 | 0x01<<8; |
100 | s->style_fontsize = 0x12; |
101 | s->style_color = MKTAG(0xFF, 0xFF, 0xFF, 0xFF); |
102 | /*The above three attributes are hard coded for now |
103 | but will come from ASS style in the future*/ |
104 | av_bprint_append_any(&s->buffer, &tsmb_size, 4); |
105 | av_bprint_append_any(&s->buffer, &tsmb_type, 4); |
106 | av_bprint_append_any(&s->buffer, &s->style_entries, 2); |
107 | for (j = 0; j < s->count; j++) { |
108 | av_bprint_append_any(&s->buffer, &s->style_attributes[j]->style_start, 2); |
109 | av_bprint_append_any(&s->buffer, &s->style_attributes[j]->style_end, 2); |
110 | av_bprint_append_any(&s->buffer, &s->style_fontID, 2); |
111 | av_bprint_append_any(&s->buffer, &s->style_attributes[j]->style_flag, 1); |
112 | av_bprint_append_any(&s->buffer, &s->style_fontsize, 1); |
113 | av_bprint_append_any(&s->buffer, &s->style_color, 4); |
114 | } |
115 | mov_text_cleanup(s); |
116 | } |
117 | } |
118 | |
119 | static void encode_hlit(MovTextContext *s, uint32_t tsmb_type) |
120 | { |
121 | uint32_t tsmb_size; |
122 | if (s->box_flags & HLIT_BOX) { |
123 | tsmb_size = 12; |
124 | tsmb_size = AV_RB32(&tsmb_size); |
125 | av_bprint_append_any(&s->buffer, &tsmb_size, 4); |
126 | av_bprint_append_any(&s->buffer, &tsmb_type, 4); |
127 | av_bprint_append_any(&s->buffer, &s->hlit.start, 2); |
128 | av_bprint_append_any(&s->buffer, &s->hlit.end, 2); |
129 | } |
130 | } |
131 | |
132 | static void encode_hclr(MovTextContext *s, uint32_t tsmb_type) |
133 | { |
134 | uint32_t tsmb_size; |
135 | if (s->box_flags & HCLR_BOX) { |
136 | tsmb_size = 12; |
137 | tsmb_size = AV_RB32(&tsmb_size); |
138 | av_bprint_append_any(&s->buffer, &tsmb_size, 4); |
139 | av_bprint_append_any(&s->buffer, &tsmb_type, 4); |
140 | av_bprint_append_any(&s->buffer, &s->hclr.color, 4); |
141 | } |
142 | } |
143 | |
144 | static const Box box_types[] = { |
145 | { MKTAG('s','t','y','l'), encode_styl }, |
146 | { MKTAG('h','l','i','t'), encode_hlit }, |
147 | { MKTAG('h','c','l','r'), encode_hclr }, |
148 | }; |
149 | |
150 | const static size_t box_count = FF_ARRAY_ELEMS(box_types); |
151 | |
152 | static av_cold int mov_text_encode_init(AVCodecContext *avctx) |
153 | { |
154 | /* |
155 | * For now, we'll use a fixed default style. When we add styling |
156 | * support, this will be generated from the ASS style. |
157 | */ |
158 | static const uint8_t text_sample_entry[] = { |
159 | 0x00, 0x00, 0x00, 0x00, // uint32_t displayFlags |
160 | 0x01, // int8_t horizontal-justification |
161 | 0xFF, // int8_t vertical-justification |
162 | 0x00, 0x00, 0x00, 0x00, // uint8_t background-color-rgba[4] |
163 | // BoxRecord { |
164 | 0x00, 0x00, // int16_t top |
165 | 0x00, 0x00, // int16_t left |
166 | 0x00, 0x00, // int16_t bottom |
167 | 0x00, 0x00, // int16_t right |
168 | // }; |
169 | // StyleRecord { |
170 | 0x00, 0x00, // uint16_t startChar |
171 | 0x00, 0x00, // uint16_t endChar |
172 | 0x00, 0x01, // uint16_t font-ID |
173 | 0x00, // uint8_t face-style-flags |
174 | 0x12, // uint8_t font-size |
175 | 0xFF, 0xFF, 0xFF, 0xFF, // uint8_t text-color-rgba[4] |
176 | // }; |
177 | // FontTableBox { |
178 | 0x00, 0x00, 0x00, 0x12, // uint32_t size |
179 | 'f', 't', 'a', 'b', // uint8_t name[4] |
180 | 0x00, 0x01, // uint16_t entry-count |
181 | // FontRecord { |
182 | 0x00, 0x01, // uint16_t font-ID |
183 | 0x05, // uint8_t font-name-length |
184 | 'S', 'e', 'r', 'i', 'f',// uint8_t font[font-name-length] |
185 | // }; |
186 | // }; |
187 | }; |
188 | |
189 | MovTextContext *s = avctx->priv_data; |
190 | |
191 | avctx->extradata_size = sizeof text_sample_entry; |
192 | avctx->extradata = av_mallocz(avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE); |
193 | if (!avctx->extradata) |
194 | return AVERROR(ENOMEM); |
195 | |
196 | av_bprint_init(&s->buffer, 0, AV_BPRINT_SIZE_UNLIMITED); |
197 | |
198 | memcpy(avctx->extradata, text_sample_entry, avctx->extradata_size); |
199 | |
200 | s->ass_ctx = ff_ass_split(avctx->subtitle_header); |
201 | return s->ass_ctx ? 0 : AVERROR_INVALIDDATA; |
202 | } |
203 | |
204 | static void mov_text_style_cb(void *priv, const char style, int close) |
205 | { |
206 | MovTextContext *s = priv; |
207 | if (!close) { |
208 | if (!(s->box_flags & STYL_BOX)) { //first style entry |
209 | |
210 | s->style_attributes_temp = av_malloc(sizeof(*s->style_attributes_temp)); |
211 | |
212 | if (!s->style_attributes_temp) { |
213 | av_bprint_clear(&s->buffer); |
214 | s->box_flags &= ~STYL_BOX; |
215 | return; |
216 | } |
217 | |
218 | s->style_attributes_temp->style_flag = 0; |
219 | s->style_attributes_temp->style_start = AV_RB16(&s->text_pos); |
220 | } else { |
221 | if (s->style_attributes_temp->style_flag) { //break the style record here and start a new one |
222 | s->style_attributes_temp->style_end = AV_RB16(&s->text_pos); |
223 | av_dynarray_add(&s->style_attributes, &s->count, s->style_attributes_temp); |
224 | s->style_attributes_temp = av_malloc(sizeof(*s->style_attributes_temp)); |
225 | if (!s->style_attributes_temp) { |
226 | mov_text_cleanup(s); |
227 | av_bprint_clear(&s->buffer); |
228 | s->box_flags &= ~STYL_BOX; |
229 | return; |
230 | } |
231 | |
232 | s->style_attributes_temp->style_flag = s->style_attributes[s->count - 1]->style_flag; |
233 | s->style_attributes_temp->style_start = AV_RB16(&s->text_pos); |
234 | } else { |
235 | s->style_attributes_temp->style_flag = 0; |
236 | s->style_attributes_temp->style_start = AV_RB16(&s->text_pos); |
237 | } |
238 | } |
239 | switch (style){ |
240 | case 'b': |
241 | s->style_attributes_temp->style_flag |= STYLE_FLAG_BOLD; |
242 | break; |
243 | case 'i': |
244 | s->style_attributes_temp->style_flag |= STYLE_FLAG_ITALIC; |
245 | break; |
246 | case 'u': |
247 | s->style_attributes_temp->style_flag |= STYLE_FLAG_UNDERLINE; |
248 | break; |
249 | } |
250 | } else { |
251 | s->style_attributes_temp->style_end = AV_RB16(&s->text_pos); |
252 | av_dynarray_add(&s->style_attributes, &s->count, s->style_attributes_temp); |
253 | |
254 | s->style_attributes_temp = av_malloc(sizeof(*s->style_attributes_temp)); |
255 | |
256 | if (!s->style_attributes_temp) { |
257 | mov_text_cleanup(s); |
258 | av_bprint_clear(&s->buffer); |
259 | s->box_flags &= ~STYL_BOX; |
260 | return; |
261 | } |
262 | |
263 | s->style_attributes_temp->style_flag = s->style_attributes[s->count - 1]->style_flag; |
264 | switch (style){ |
265 | case 'b': |
266 | s->style_attributes_temp->style_flag &= ~STYLE_FLAG_BOLD; |
267 | break; |
268 | case 'i': |
269 | s->style_attributes_temp->style_flag &= ~STYLE_FLAG_ITALIC; |
270 | break; |
271 | case 'u': |
272 | s->style_attributes_temp->style_flag &= ~STYLE_FLAG_UNDERLINE; |
273 | break; |
274 | } |
275 | if (s->style_attributes_temp->style_flag) { //start of new style record |
276 | s->style_attributes_temp->style_start = AV_RB16(&s->text_pos); |
277 | } |
278 | } |
279 | s->box_flags |= STYL_BOX; |
280 | } |
281 | |
282 | static void mov_text_color_cb(void *priv, unsigned int color, unsigned int color_id) |
283 | { |
284 | MovTextContext *s = priv; |
285 | if (color_id == 2) { //secondary color changes |
286 | if (s->box_flags & HLIT_BOX) { //close tag |
287 | s->hlit.end = AV_RB16(&s->text_pos); |
288 | } else { |
289 | s->box_flags |= HCLR_BOX; |
290 | s->box_flags |= HLIT_BOX; |
291 | s->hlit.start = AV_RB16(&s->text_pos); |
292 | s->hclr.color = color | (0xFF << 24); //set alpha value to FF |
293 | } |
294 | } |
295 | /* If there are more than one secondary color changes in ASS, take start of |
296 | first section and end of last section. Movtext allows only one |
297 | highlight box per sample. |
298 | */ |
299 | } |
300 | |
301 | static void mov_text_text_cb(void *priv, const char *text, int len) |
302 | { |
303 | MovTextContext *s = priv; |
304 | av_bprint_append_data(&s->buffer, text, len); |
305 | s->text_pos += len; |
306 | } |
307 | |
308 | static void mov_text_new_line_cb(void *priv, int forced) |
309 | { |
310 | MovTextContext *s = priv; |
311 | av_bprint_append_data(&s->buffer, "\n", 1); |
312 | s->text_pos += 1; |
313 | } |
314 | |
315 | static const ASSCodesCallbacks mov_text_callbacks = { |
316 | .text = mov_text_text_cb, |
317 | .new_line = mov_text_new_line_cb, |
318 | .style = mov_text_style_cb, |
319 | .color = mov_text_color_cb, |
320 | }; |
321 | |
322 | static int mov_text_encode_frame(AVCodecContext *avctx, unsigned char *buf, |
323 | int bufsize, const AVSubtitle *sub) |
324 | { |
325 | MovTextContext *s = avctx->priv_data; |
326 | ASSDialog *dialog; |
327 | int i, length; |
328 | size_t j; |
329 | |
330 | s->text_pos = 0; |
331 | s->count = 0; |
332 | s->box_flags = 0; |
333 | s->style_entries = 0; |
334 | for (i = 0; i < sub->num_rects; i++) { |
335 | const char *ass = sub->rects[i]->ass; |
336 | |
337 | if (sub->rects[i]->type != SUBTITLE_ASS) { |
338 | av_log(avctx, AV_LOG_ERROR, "Only SUBTITLE_ASS type supported.\n"); |
339 | return AVERROR(ENOSYS); |
340 | } |
341 | |
342 | #if FF_API_ASS_TIMING |
343 | if (!strncmp(ass, "Dialogue: ", 10)) { |
344 | int num; |
345 | dialog = ff_ass_split_dialog(s->ass_ctx, ass, 0, &num); |
346 | for (; dialog && num--; dialog++) { |
347 | ff_ass_split_override_codes(&mov_text_callbacks, s, dialog->text); |
348 | } |
349 | } else { |
350 | #endif |
351 | dialog = ff_ass_split_dialog2(s->ass_ctx, ass); |
352 | if (!dialog) |
353 | return AVERROR(ENOMEM); |
354 | ff_ass_split_override_codes(&mov_text_callbacks, s, dialog->text); |
355 | ff_ass_free_dialog(&dialog); |
356 | #if FF_API_ASS_TIMING |
357 | } |
358 | #endif |
359 | |
360 | for (j = 0; j < box_count; j++) { |
361 | box_types[j].encode(s, box_types[j].type); |
362 | } |
363 | } |
364 | |
365 | AV_WB16(buf, s->text_pos); |
366 | buf += 2; |
367 | |
368 | if (!av_bprint_is_complete(&s->buffer)) { |
369 | length = AVERROR(ENOMEM); |
370 | goto exit; |
371 | } |
372 | |
373 | if (!s->buffer.len) { |
374 | length = 0; |
375 | goto exit; |
376 | } |
377 | |
378 | if (s->buffer.len > bufsize - 3) { |
379 | av_log(avctx, AV_LOG_ERROR, "Buffer too small for ASS event.\n"); |
380 | length = AVERROR(EINVAL); |
381 | goto exit; |
382 | } |
383 | |
384 | memcpy(buf, s->buffer.str, s->buffer.len); |
385 | length = s->buffer.len + 2; |
386 | |
387 | exit: |
388 | av_bprint_clear(&s->buffer); |
389 | return length; |
390 | } |
391 | |
392 | static int mov_text_encode_close(AVCodecContext *avctx) |
393 | { |
394 | MovTextContext *s = avctx->priv_data; |
395 | ff_ass_split_free(s->ass_ctx); |
396 | av_bprint_finalize(&s->buffer, NULL); |
397 | return 0; |
398 | } |
399 | |
400 | AVCodec ff_movtext_encoder = { |
401 | .name = "mov_text", |
402 | .long_name = NULL_IF_CONFIG_SMALL("3GPP Timed Text subtitle"), |
403 | .type = AVMEDIA_TYPE_SUBTITLE, |
404 | .id = AV_CODEC_ID_MOV_TEXT, |
405 | .priv_data_size = sizeof(MovTextContext), |
406 | .init = mov_text_encode_init, |
407 | .encode_sub = mov_text_encode_frame, |
408 | .close = mov_text_encode_close, |
409 | }; |
410 |