blob: 16f3f58c1cfeb7fba0bdf91d4ba0ba263901a543
1 | /* |
2 | * Copyright (c) 2012 Clément Bœsch |
3 | * |
4 | * This file is part of FFmpeg. |
5 | * |
6 | * FFmpeg is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU Lesser General Public |
8 | * License as published by the Free Software Foundation; either |
9 | * version 2.1 of the License, or (at your option) any later version. |
10 | * |
11 | * FFmpeg is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * Lesser General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU Lesser General Public |
17 | * License along with FFmpeg; if not, write to the Free Software |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | */ |
20 | |
21 | /** |
22 | * @file |
23 | * SAMI subtitle decoder |
24 | * @see http://msdn.microsoft.com/en-us/library/ms971327.aspx |
25 | */ |
26 | |
27 | #include "ass.h" |
28 | #include "libavutil/avstring.h" |
29 | #include "libavutil/bprint.h" |
30 | #include "htmlsubtitles.h" |
31 | |
32 | typedef struct { |
33 | AVBPrint source; |
34 | AVBPrint content; |
35 | AVBPrint encoded_source; |
36 | AVBPrint encoded_content; |
37 | AVBPrint full; |
38 | int readorder; |
39 | } SAMIContext; |
40 | |
41 | static int sami_paragraph_to_ass(AVCodecContext *avctx, const char *src) |
42 | { |
43 | SAMIContext *sami = avctx->priv_data; |
44 | int ret = 0; |
45 | char *tag = NULL; |
46 | char *dupsrc = av_strdup(src); |
47 | char *p = dupsrc; |
48 | AVBPrint *dst_content = &sami->encoded_content; |
49 | AVBPrint *dst_source = &sami->encoded_source; |
50 | |
51 | av_bprint_clear(&sami->encoded_content); |
52 | av_bprint_clear(&sami->content); |
53 | av_bprint_clear(&sami->encoded_source); |
54 | for (;;) { |
55 | char *saveptr = NULL; |
56 | int prev_chr_is_space = 0; |
57 | AVBPrint *dst = &sami->content; |
58 | |
59 | /* parse & extract paragraph tag */ |
60 | p = av_stristr(p, "<P"); |
61 | if (!p) |
62 | break; |
63 | if (p[2] != '>' && !av_isspace(p[2])) { // avoid confusion with tags such as <PRE> |
64 | p++; |
65 | continue; |
66 | } |
67 | if (dst->len) // add a separator with the previous paragraph if there was one |
68 | av_bprintf(dst, "\\N"); |
69 | tag = av_strtok(p, ">", &saveptr); |
70 | if (!tag || !saveptr) |
71 | break; |
72 | p = saveptr; |
73 | |
74 | /* check if the current paragraph is the "source" (speaker name) */ |
75 | if (av_stristr(tag, "ID=Source") || av_stristr(tag, "ID=\"Source\"")) { |
76 | dst = &sami->source; |
77 | av_bprint_clear(dst); |
78 | } |
79 | |
80 | /* if empty event -> skip subtitle */ |
81 | while (av_isspace(*p)) |
82 | p++; |
83 | if (!strncmp(p, " ", 6)) { |
84 | ret = -1; |
85 | goto end; |
86 | } |
87 | |
88 | /* extract the text, stripping most of the tags */ |
89 | while (*p) { |
90 | if (*p == '<') { |
91 | if (!av_strncasecmp(p, "<P", 2) && (p[2] == '>' || av_isspace(p[2]))) |
92 | break; |
93 | } |
94 | if (!av_strncasecmp(p, "<BR", 3)) { |
95 | av_bprintf(dst, "\\N"); |
96 | p++; |
97 | while (*p && *p != '>') |
98 | p++; |
99 | if (!*p) |
100 | break; |
101 | if (*p == '>') |
102 | p++; |
103 | continue; |
104 | } |
105 | if (!av_isspace(*p)) |
106 | av_bprint_chars(dst, *p, 1); |
107 | else if (!prev_chr_is_space) |
108 | av_bprint_chars(dst, ' ', 1); |
109 | prev_chr_is_space = av_isspace(*p); |
110 | p++; |
111 | } |
112 | } |
113 | |
114 | av_bprint_clear(&sami->full); |
115 | if (sami->source.len) { |
116 | ff_htmlmarkup_to_ass(avctx, dst_source, sami->source.str); |
117 | av_bprintf(&sami->full, "{\\i1}%s{\\i0}\\N", sami->encoded_source.str); |
118 | } |
119 | ff_htmlmarkup_to_ass(avctx, dst_content, sami->content.str); |
120 | av_bprintf(&sami->full, "%s", sami->encoded_content.str); |
121 | |
122 | end: |
123 | av_free(dupsrc); |
124 | return ret; |
125 | } |
126 | |
127 | static int sami_decode_frame(AVCodecContext *avctx, |
128 | void *data, int *got_sub_ptr, AVPacket *avpkt) |
129 | { |
130 | AVSubtitle *sub = data; |
131 | const char *ptr = avpkt->data; |
132 | SAMIContext *sami = avctx->priv_data; |
133 | |
134 | if (ptr && avpkt->size > 0 && !sami_paragraph_to_ass(avctx, ptr)) { |
135 | // TODO: pass escaped sami->encoded_source.str as source |
136 | int ret = ff_ass_add_rect(sub, sami->full.str, sami->readorder++, 0, NULL, NULL); |
137 | if (ret < 0) |
138 | return ret; |
139 | } |
140 | *got_sub_ptr = sub->num_rects > 0; |
141 | return avpkt->size; |
142 | } |
143 | |
144 | static av_cold int sami_init(AVCodecContext *avctx) |
145 | { |
146 | SAMIContext *sami = avctx->priv_data; |
147 | av_bprint_init(&sami->source, 0, 2048); |
148 | av_bprint_init(&sami->content, 0, 2048); |
149 | av_bprint_init(&sami->encoded_source, 0, 2048); |
150 | av_bprint_init(&sami->encoded_content, 0, 2048); |
151 | av_bprint_init(&sami->full, 0, 2048); |
152 | return ff_ass_subtitle_header_default(avctx); |
153 | } |
154 | |
155 | static av_cold int sami_close(AVCodecContext *avctx) |
156 | { |
157 | SAMIContext *sami = avctx->priv_data; |
158 | av_bprint_finalize(&sami->source, NULL); |
159 | av_bprint_finalize(&sami->content, NULL); |
160 | av_bprint_finalize(&sami->encoded_source, NULL); |
161 | av_bprint_finalize(&sami->encoded_content, NULL); |
162 | av_bprint_finalize(&sami->full, NULL); |
163 | return 0; |
164 | } |
165 | |
166 | static void sami_flush(AVCodecContext *avctx) |
167 | { |
168 | SAMIContext *sami = avctx->priv_data; |
169 | if (!(avctx->flags2 & AV_CODEC_FLAG2_RO_FLUSH_NOOP)) |
170 | sami->readorder = 0; |
171 | } |
172 | |
173 | AVCodec ff_sami_decoder = { |
174 | .name = "sami", |
175 | .long_name = NULL_IF_CONFIG_SMALL("SAMI subtitle"), |
176 | .type = AVMEDIA_TYPE_SUBTITLE, |
177 | .id = AV_CODEC_ID_SAMI, |
178 | .priv_data_size = sizeof(SAMIContext), |
179 | .init = sami_init, |
180 | .close = sami_close, |
181 | .decode = sami_decode_frame, |
182 | .flush = sami_flush, |
183 | }; |
184 |