blob: ca78db224d52f26fa4e8d7dffce68b05df579594
1 | /* |
2 | * Copyright (c) 2012 Clément Bœsch |
3 | * |
4 | * This file is part of FFmpeg. |
5 | * |
6 | * FFmpeg is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU Lesser General Public |
8 | * License as published by the Free Software Foundation; either |
9 | * version 2.1 of the License, or (at your option) any later version. |
10 | * |
11 | * FFmpeg is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * Lesser General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU Lesser General Public |
17 | * License along with FFmpeg; if not, write to the Free Software |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | */ |
20 | |
21 | #ifndef AVFORMAT_SUBTITLES_H |
22 | #define AVFORMAT_SUBTITLES_H |
23 | |
24 | #include <stdint.h> |
25 | #include <stddef.h> |
26 | #include "avformat.h" |
27 | #include "libavutil/bprint.h" |
28 | |
29 | enum sub_sort { |
30 | SUB_SORT_TS_POS = 0, ///< sort by timestamps, then position |
31 | SUB_SORT_POS_TS, ///< sort by position, then timestamps |
32 | }; |
33 | |
34 | enum ff_utf_type { |
35 | FF_UTF_8, // or other 8 bit encodings |
36 | FF_UTF16LE, |
37 | FF_UTF16BE, |
38 | }; |
39 | |
40 | typedef struct { |
41 | int type; |
42 | AVIOContext *pb; |
43 | unsigned char buf[8]; |
44 | int buf_pos, buf_len; |
45 | AVIOContext buf_pb; |
46 | } FFTextReader; |
47 | |
48 | /** |
49 | * Initialize the FFTextReader from the given AVIOContext. This function will |
50 | * read some bytes from pb, and test for UTF-8 or UTF-16 BOMs. Further accesses |
51 | * to FFTextReader will read more data from pb. |
52 | * If s is not NULL, the user will be warned if a UTF-16 conversion takes place. |
53 | * |
54 | * The purpose of FFTextReader is to transparently convert read data to UTF-8 |
55 | * if the stream had a UTF-16 BOM. |
56 | * |
57 | * @param s Pointer to provide av_log context |
58 | * @param r object which will be initialized |
59 | * @param pb stream to read from (referenced as long as FFTextReader is in use) |
60 | */ |
61 | void ff_text_init_avio(void *s, FFTextReader *r, AVIOContext *pb); |
62 | |
63 | /** |
64 | * Similar to ff_text_init_avio(), but sets it up to read from a bounded buffer. |
65 | * |
66 | * @param r object which will be initialized |
67 | * @param buf buffer to read from (referenced as long as FFTextReader is in use) |
68 | * @param size size of buf |
69 | */ |
70 | void ff_text_init_buf(FFTextReader *r, void *buf, size_t size); |
71 | |
72 | /** |
73 | * Return the byte position of the next byte returned by ff_text_r8(). For |
74 | * UTF-16 source streams, this will return the original position, but it will |
75 | * be incorrect if a codepoint was only partially read with ff_text_r8(). |
76 | */ |
77 | int64_t ff_text_pos(FFTextReader *r); |
78 | |
79 | /** |
80 | * Return the next byte. The return value is always 0 - 255. Returns 0 on EOF. |
81 | * If the source stream is UTF-16, this reads from the stream converted to |
82 | * UTF-8. On invalid UTF-16, 0 is returned. |
83 | */ |
84 | int ff_text_r8(FFTextReader *r); |
85 | |
86 | /** |
87 | * Return non-zero if EOF was reached. |
88 | */ |
89 | int ff_text_eof(FFTextReader *r); |
90 | |
91 | /** |
92 | * Like ff_text_r8(), but don't remove the byte from the buffer. |
93 | */ |
94 | int ff_text_peek_r8(FFTextReader *r); |
95 | |
96 | /** |
97 | * Read the given number of bytes (in UTF-8). On error or EOF, \0 bytes are |
98 | * written. |
99 | */ |
100 | void ff_text_read(FFTextReader *r, char *buf, size_t size); |
101 | |
102 | typedef struct { |
103 | AVPacket *subs; ///< array of subtitles packets |
104 | int nb_subs; ///< number of subtitles packets |
105 | int allocated_size; ///< allocated size for subs |
106 | int current_sub_idx; ///< current position for the read packet callback |
107 | enum sub_sort sort; ///< sort method to use when finalizing subtitles |
108 | int keep_duplicates; ///< set to 1 to keep duplicated subtitle events |
109 | } FFDemuxSubtitlesQueue; |
110 | |
111 | /** |
112 | * Insert a new subtitle event. |
113 | * |
114 | * @param event the subtitle line, may not be zero terminated |
115 | * @param len the length of the event (in strlen() sense, so without '\0') |
116 | * @param merge set to 1 if the current event should be concatenated with the |
117 | * previous one instead of adding a new entry, 0 otherwise |
118 | */ |
119 | AVPacket *ff_subtitles_queue_insert(FFDemuxSubtitlesQueue *q, |
120 | const uint8_t *event, size_t len, int merge); |
121 | |
122 | /** |
123 | * Set missing durations, sort subtitles by PTS (and then byte position), and |
124 | * drop duplicated events. |
125 | */ |
126 | void ff_subtitles_queue_finalize(void *log_ctx, FFDemuxSubtitlesQueue *q); |
127 | |
128 | /** |
129 | * Generic read_packet() callback for subtitles demuxers using this queue |
130 | * system. |
131 | */ |
132 | int ff_subtitles_queue_read_packet(FFDemuxSubtitlesQueue *q, AVPacket *pkt); |
133 | |
134 | /** |
135 | * Update current_sub_idx to emulate a seek. Except the first parameter, it |
136 | * matches AVInputFormat->read_seek2 prototypes. |
137 | */ |
138 | int ff_subtitles_queue_seek(FFDemuxSubtitlesQueue *q, AVFormatContext *s, int stream_index, |
139 | int64_t min_ts, int64_t ts, int64_t max_ts, int flags); |
140 | |
141 | /** |
142 | * Remove and destroy all the subtitles packets. |
143 | */ |
144 | void ff_subtitles_queue_clean(FFDemuxSubtitlesQueue *q); |
145 | |
146 | /** |
147 | * SMIL helper to load next chunk ("<...>" or untagged content) in buf. |
148 | * |
149 | * @param c cached character, to avoid a backward seek |
150 | */ |
151 | int ff_smil_extract_next_text_chunk(FFTextReader *tr, AVBPrint *buf, char *c); |
152 | |
153 | /** |
154 | * SMIL helper to point on the value of an attribute in the given tag. |
155 | * |
156 | * @param s SMIL tag ("<...>") |
157 | * @param attr the attribute to look for |
158 | */ |
159 | const char *ff_smil_get_attr_ptr(const char *s, const char *attr); |
160 | |
161 | /** |
162 | * @brief Same as ff_subtitles_read_text_chunk(), but read from an AVIOContext. |
163 | */ |
164 | void ff_subtitles_read_chunk(AVIOContext *pb, AVBPrint *buf); |
165 | |
166 | /** |
167 | * @brief Read a subtitles chunk from FFTextReader. |
168 | * |
169 | * A chunk is defined by a multiline "event", ending with a second line break. |
170 | * The trailing line breaks are trimmed. CRLF are supported. |
171 | * Example: "foo\r\nbar\r\n\r\nnext" will print "foo\r\nbar" into buf, and pb |
172 | * will focus on the 'n' of the "next" string. |
173 | * |
174 | * @param tr I/O context |
175 | * @param buf an initialized buf where the chunk is written |
176 | * |
177 | * @note buf is cleared before writing into it. |
178 | */ |
179 | void ff_subtitles_read_text_chunk(FFTextReader *tr, AVBPrint *buf); |
180 | |
181 | /** |
182 | * Get the number of characters to increment to jump to the next line, or to |
183 | * the end of the string. |
184 | * The function handles the following line breaks schemes: |
185 | * LF, CRLF (MS), or standalone CR (old MacOS). |
186 | */ |
187 | static av_always_inline int ff_subtitles_next_line(const char *ptr) |
188 | { |
189 | int n = strcspn(ptr, "\r\n"); |
190 | ptr += n; |
191 | if (*ptr == '\r') { |
192 | ptr++; |
193 | n++; |
194 | } |
195 | if (*ptr == '\n') |
196 | n++; |
197 | return n; |
198 | } |
199 | |
200 | /** |
201 | * Read a line of text. Discards line ending characters. |
202 | * The function handles the following line breaks schemes: |
203 | * LF, CRLF (MS), or standalone CR (old MacOS). |
204 | * |
205 | * Returns the number of bytes written to buf. Always writes a terminating 0, |
206 | * similar as with snprintf. |
207 | * |
208 | * @note returns a negative error code if a \0 byte is found |
209 | */ |
210 | ptrdiff_t ff_subtitles_read_line(FFTextReader *tr, char *buf, size_t size); |
211 | |
212 | #endif /* AVFORMAT_SUBTITLES_H */ |
213 |