blob: 12f74b22a08ef0e1a1fef2f840a03891d4b1bbef
1 | /* |
2 | * LRC lyrics file format decoder |
3 | * Copyright (c) 2014 StarBrilliant <m13253@hotmail.com> |
4 | * |
5 | * This file is part of FFmpeg. |
6 | * |
7 | * FFmpeg is free software; you can redistribute it and/or |
8 | * modify it under the terms of the GNU Lesser General Public |
9 | * License as published by the Free Software Foundation; either |
10 | * version 2.1 of the License, or (at your option) any later version. |
11 | * |
12 | * FFmpeg is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | * Lesser General Public License for more details. |
16 | * |
17 | * You should have received a copy of the GNU Lesser General Public |
18 | * License along with FFmpeg; if not, write to the Free Software |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | */ |
21 | |
22 | #include <inttypes.h> |
23 | #include <stdint.h> |
24 | #include <string.h> |
25 | |
26 | #include "avformat.h" |
27 | #include "internal.h" |
28 | #include "lrc.h" |
29 | #include "metadata.h" |
30 | #include "subtitles.h" |
31 | #include "libavutil/bprint.h" |
32 | #include "libavutil/dict.h" |
33 | |
34 | typedef struct LRCContext { |
35 | FFDemuxSubtitlesQueue q; |
36 | int64_t ts_offset; // offset metadata item |
37 | } LRCContext; |
38 | |
39 | static int64_t find_header(const char *p) |
40 | { |
41 | int64_t offset = 0; |
42 | while(p[offset] == ' ' || p[offset] == '\t') { |
43 | offset++; |
44 | } |
45 | if(p[offset] == '[' && p[offset + 1] >= 'a' && p[offset + 1] <= 'z') { |
46 | return offset; |
47 | } else { |
48 | return -1; |
49 | } |
50 | } |
51 | |
52 | static int64_t count_ts(const char *p) |
53 | { |
54 | int64_t offset = 0; |
55 | int in_brackets = 0; |
56 | |
57 | for(;;) { |
58 | if(p[offset] == ' ' || p[offset] == '\t') { |
59 | offset++; |
60 | } else if(p[offset] == '[') { |
61 | offset++; |
62 | in_brackets++; |
63 | } else if (p[offset] == ']' && in_brackets) { |
64 | offset++; |
65 | in_brackets--; |
66 | } else if(in_brackets && |
67 | (p[offset] == ':' || p[offset] == '.' || p[offset] == '-' || |
68 | (p[offset] >= '0' && p[offset] <= '9'))) { |
69 | offset++; |
70 | } else { |
71 | break; |
72 | } |
73 | } |
74 | return offset; |
75 | } |
76 | |
77 | static int64_t read_ts(const char *p, int64_t *start) |
78 | { |
79 | int64_t offset = 0; |
80 | uint64_t mm, ss, cs; |
81 | |
82 | while(p[offset] == ' ' || p[offset] == '\t') { |
83 | offset++; |
84 | } |
85 | if(p[offset] != '[') { |
86 | return 0; |
87 | } |
88 | if(sscanf(p, "[-%"SCNu64":%"SCNu64".%"SCNu64"]", &mm, &ss, &cs) == 3) { |
89 | /* Just in case negative pts, players may drop it but we won't. */ |
90 | *start = -(int64_t) (mm*60000 + ss*1000 + cs*10); |
91 | } else if(sscanf(p, "[%"SCNu64":%"SCNu64".%"SCNu64"]", &mm, &ss, &cs) == 3) { |
92 | *start = mm*60000 + ss*1000 + cs*10; |
93 | } else { |
94 | return 0; |
95 | } |
96 | do { |
97 | offset++; |
98 | } while(p[offset] && p[offset-1] != ']'); |
99 | return offset; |
100 | } |
101 | |
102 | static int64_t read_line(AVBPrint *buf, AVIOContext *pb) |
103 | { |
104 | int64_t pos = avio_tell(pb); |
105 | |
106 | av_bprint_clear(buf); |
107 | while(!avio_feof(pb)) { |
108 | int c = avio_r8(pb); |
109 | if(c != '\r') { |
110 | av_bprint_chars(buf, c, 1); |
111 | } |
112 | if(c == '\n') { |
113 | break; |
114 | } |
115 | } |
116 | return pos; |
117 | } |
118 | |
119 | static int lrc_probe(AVProbeData *p) |
120 | { |
121 | int64_t offset = 0; |
122 | int64_t mm; |
123 | uint64_t ss, cs; |
124 | const AVMetadataConv *metadata_item; |
125 | |
126 | if(!memcmp(p->buf, "\xef\xbb\xbf", 3)) { // Skip UTF-8 BOM header |
127 | offset += 3; |
128 | } |
129 | while(p->buf[offset] == '\n' || p->buf[offset] == '\r') { |
130 | offset++; |
131 | } |
132 | if(p->buf[offset] != '[') { |
133 | return 0; |
134 | } |
135 | offset++; |
136 | // Common metadata item but not exist in ff_lrc_metadata_conv |
137 | if(!memcmp(p->buf + offset, "offset:", 7)) { |
138 | return 40; |
139 | } |
140 | if(sscanf(p->buf + offset, "%"SCNd64":%"SCNu64".%"SCNu64"]", |
141 | &mm, &ss, &cs) == 3) { |
142 | return 50; |
143 | } |
144 | // Metadata items exist in ff_lrc_metadata_conv |
145 | for(metadata_item = ff_lrc_metadata_conv; |
146 | metadata_item->native; metadata_item++) { |
147 | size_t metadata_item_len = strlen(metadata_item->native); |
148 | if(p->buf[offset + metadata_item_len] == ':' && |
149 | !memcmp(p->buf + offset, metadata_item->native, metadata_item_len)) { |
150 | return 40; |
151 | } |
152 | } |
153 | return 5; // Give it 5 scores since it starts with a bracket |
154 | } |
155 | |
156 | static int lrc_read_header(AVFormatContext *s) |
157 | { |
158 | LRCContext *lrc = s->priv_data; |
159 | AVBPrint line; |
160 | AVStream *st; |
161 | |
162 | st = avformat_new_stream(s, NULL); |
163 | if(!st) { |
164 | return AVERROR(ENOMEM); |
165 | } |
166 | avpriv_set_pts_info(st, 64, 1, 1000); |
167 | lrc->ts_offset = 0; |
168 | st->codecpar->codec_type = AVMEDIA_TYPE_SUBTITLE; |
169 | st->codecpar->codec_id = AV_CODEC_ID_TEXT; |
170 | av_bprint_init(&line, 0, AV_BPRINT_SIZE_UNLIMITED); |
171 | |
172 | while(!avio_feof(s->pb)) { |
173 | int64_t pos = read_line(&line, s->pb); |
174 | int64_t header_offset = find_header(line.str); |
175 | if(header_offset >= 0) { |
176 | char *comma_offset = strchr(line.str, ':'); |
177 | if(comma_offset) { |
178 | char *right_bracket_offset = strchr(line.str, ']'); |
179 | if(!right_bracket_offset) { |
180 | continue; |
181 | } |
182 | |
183 | *right_bracket_offset = *comma_offset = '\0'; |
184 | if(strcmp(line.str + 1, "offset") || |
185 | sscanf(comma_offset + 1, "%"SCNd64, &lrc->ts_offset) != 1) { |
186 | av_dict_set(&s->metadata, line.str + 1, comma_offset + 1, 0); |
187 | } |
188 | *comma_offset = ':'; |
189 | *right_bracket_offset = ']'; |
190 | } |
191 | |
192 | } else { |
193 | AVPacket *sub; |
194 | int64_t ts_start = AV_NOPTS_VALUE; |
195 | int64_t ts_stroffset = 0; |
196 | int64_t ts_stroffset_incr = 0; |
197 | int64_t ts_strlength = count_ts(line.str); |
198 | |
199 | while((ts_stroffset_incr = read_ts(line.str + ts_stroffset, |
200 | &ts_start)) != 0) { |
201 | ts_stroffset += ts_stroffset_incr; |
202 | sub = ff_subtitles_queue_insert(&lrc->q, line.str + ts_strlength, |
203 | line.len - ts_strlength, 0); |
204 | if(!sub) { |
205 | return AVERROR(ENOMEM); |
206 | } |
207 | sub->pos = pos; |
208 | sub->pts = ts_start - lrc->ts_offset; |
209 | sub->duration = -1; |
210 | } |
211 | } |
212 | } |
213 | ff_subtitles_queue_finalize(s, &lrc->q); |
214 | ff_metadata_conv_ctx(s, NULL, ff_lrc_metadata_conv); |
215 | return 0; |
216 | } |
217 | |
218 | static int lrc_read_packet(AVFormatContext *s, AVPacket *pkt) |
219 | { |
220 | LRCContext *lrc = s->priv_data; |
221 | return ff_subtitles_queue_read_packet(&lrc->q, pkt); |
222 | } |
223 | |
224 | static int lrc_read_seek(AVFormatContext *s, int stream_index, |
225 | int64_t min_ts, int64_t ts, int64_t max_ts, int flags) |
226 | { |
227 | LRCContext *lrc = s->priv_data; |
228 | return ff_subtitles_queue_seek(&lrc->q, s, stream_index, |
229 | min_ts, ts, max_ts, flags); |
230 | } |
231 | |
232 | static int lrc_read_close(AVFormatContext *s) |
233 | { |
234 | LRCContext *lrc = s->priv_data; |
235 | ff_subtitles_queue_clean(&lrc->q); |
236 | return 0; |
237 | } |
238 | |
239 | AVInputFormat ff_lrc_demuxer = { |
240 | .name = "lrc", |
241 | .long_name = NULL_IF_CONFIG_SMALL("LRC lyrics"), |
242 | .priv_data_size = sizeof (LRCContext), |
243 | .read_probe = lrc_probe, |
244 | .read_header = lrc_read_header, |
245 | .read_packet = lrc_read_packet, |
246 | .read_close = lrc_read_close, |
247 | .read_seek2 = lrc_read_seek |
248 | }; |
249 |