blob: fae55e8f23b022467311b69ae10c3c7c58f0fedb
1 | /* |
2 | * Copyright (c) 2006 Paul Richards <paul.richards@gmail.com> |
3 | * |
4 | * This file is part of FFmpeg. |
5 | * |
6 | * FFmpeg is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU Lesser General Public |
8 | * License as published by the Free Software Foundation; either |
9 | * version 2.1 of the License, or (at your option) any later version. |
10 | * |
11 | * FFmpeg is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * Lesser General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU Lesser General Public |
17 | * License along with FFmpeg; if not, write to the Free Software |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | */ |
20 | |
21 | /** |
22 | * @file |
23 | * @brief Theora encoder using libtheora. |
24 | * @author Paul Richards <paul.richards@gmail.com> |
25 | * |
26 | * A lot of this is copy / paste from other output codecs in |
27 | * libavcodec or pure guesswork (or both). |
28 | * |
29 | * I have used t_ prefixes on variables which are libtheora types |
30 | * and o_ prefixes on variables which are libogg types. |
31 | */ |
32 | |
33 | /* FFmpeg includes */ |
34 | #include "libavutil/common.h" |
35 | #include "libavutil/intreadwrite.h" |
36 | #include "libavutil/pixdesc.h" |
37 | #include "libavutil/log.h" |
38 | #include "libavutil/base64.h" |
39 | #include "avcodec.h" |
40 | #include "internal.h" |
41 | |
42 | /* libtheora includes */ |
43 | #include <theora/theoraenc.h> |
44 | |
45 | typedef struct TheoraContext { |
46 | th_enc_ctx *t_state; |
47 | uint8_t *stats; |
48 | int stats_size; |
49 | int stats_offset; |
50 | int uv_hshift; |
51 | int uv_vshift; |
52 | int keyframe_mask; |
53 | } TheoraContext; |
54 | |
55 | /** Concatenate an ogg_packet into the extradata. */ |
56 | static int concatenate_packet(unsigned int* offset, |
57 | AVCodecContext* avc_context, |
58 | const ogg_packet* packet) |
59 | { |
60 | const char* message = NULL; |
61 | int newsize = avc_context->extradata_size + 2 + packet->bytes; |
62 | int err = AVERROR_INVALIDDATA; |
63 | |
64 | if (packet->bytes < 0) { |
65 | message = "ogg_packet has negative size"; |
66 | } else if (packet->bytes > 0xffff) { |
67 | message = "ogg_packet is larger than 65535 bytes"; |
68 | } else if (newsize < avc_context->extradata_size) { |
69 | message = "extradata_size would overflow"; |
70 | } else { |
71 | if ((err = av_reallocp(&avc_context->extradata, newsize)) < 0) { |
72 | avc_context->extradata_size = 0; |
73 | message = "av_realloc failed"; |
74 | } |
75 | } |
76 | if (message) { |
77 | av_log(avc_context, AV_LOG_ERROR, "concatenate_packet failed: %s\n", message); |
78 | return err; |
79 | } |
80 | |
81 | avc_context->extradata_size = newsize; |
82 | AV_WB16(avc_context->extradata + (*offset), packet->bytes); |
83 | *offset += 2; |
84 | memcpy(avc_context->extradata + (*offset), packet->packet, packet->bytes); |
85 | (*offset) += packet->bytes; |
86 | return 0; |
87 | } |
88 | |
89 | static int get_stats(AVCodecContext *avctx, int eos) |
90 | { |
91 | #ifdef TH_ENCCTL_2PASS_OUT |
92 | TheoraContext *h = avctx->priv_data; |
93 | uint8_t *buf; |
94 | int bytes; |
95 | |
96 | bytes = th_encode_ctl(h->t_state, TH_ENCCTL_2PASS_OUT, &buf, sizeof(buf)); |
97 | if (bytes < 0) { |
98 | av_log(avctx, AV_LOG_ERROR, "Error getting first pass stats\n"); |
99 | return AVERROR_EXTERNAL; |
100 | } |
101 | if (!eos) { |
102 | void *tmp = av_fast_realloc(h->stats, &h->stats_size, |
103 | h->stats_offset + bytes); |
104 | if (!tmp) |
105 | return AVERROR(ENOMEM); |
106 | h->stats = tmp; |
107 | memcpy(h->stats + h->stats_offset, buf, bytes); |
108 | h->stats_offset += bytes; |
109 | } else { |
110 | int b64_size = AV_BASE64_SIZE(h->stats_offset); |
111 | // libtheora generates a summary header at the end |
112 | memcpy(h->stats, buf, bytes); |
113 | avctx->stats_out = av_malloc(b64_size); |
114 | if (!avctx->stats_out) |
115 | return AVERROR(ENOMEM); |
116 | av_base64_encode(avctx->stats_out, b64_size, h->stats, h->stats_offset); |
117 | } |
118 | return 0; |
119 | #else |
120 | av_log(avctx, AV_LOG_ERROR, "libtheora too old to support 2pass\n"); |
121 | return AVERROR(ENOSUP); |
122 | #endif |
123 | } |
124 | |
125 | // libtheora won't read the entire buffer we give it at once, so we have to |
126 | // repeatedly submit it... |
127 | static int submit_stats(AVCodecContext *avctx) |
128 | { |
129 | #ifdef TH_ENCCTL_2PASS_IN |
130 | TheoraContext *h = avctx->priv_data; |
131 | int bytes; |
132 | if (!h->stats) { |
133 | if (!avctx->stats_in) { |
134 | av_log(avctx, AV_LOG_ERROR, "No statsfile for second pass\n"); |
135 | return AVERROR(EINVAL); |
136 | } |
137 | h->stats_size = strlen(avctx->stats_in) * 3/4; |
138 | h->stats = av_malloc(h->stats_size); |
139 | if (!h->stats) { |
140 | h->stats_size = 0; |
141 | return AVERROR(ENOMEM); |
142 | } |
143 | h->stats_size = av_base64_decode(h->stats, avctx->stats_in, h->stats_size); |
144 | } |
145 | while (h->stats_size - h->stats_offset > 0) { |
146 | bytes = th_encode_ctl(h->t_state, TH_ENCCTL_2PASS_IN, |
147 | h->stats + h->stats_offset, |
148 | h->stats_size - h->stats_offset); |
149 | if (bytes < 0) { |
150 | av_log(avctx, AV_LOG_ERROR, "Error submitting stats\n"); |
151 | return AVERROR_EXTERNAL; |
152 | } |
153 | if (!bytes) |
154 | return 0; |
155 | h->stats_offset += bytes; |
156 | } |
157 | return 0; |
158 | #else |
159 | av_log(avctx, AV_LOG_ERROR, "libtheora too old to support 2pass\n"); |
160 | return AVERROR(ENOSUP); |
161 | #endif |
162 | } |
163 | |
164 | static av_cold int encode_init(AVCodecContext* avc_context) |
165 | { |
166 | th_info t_info; |
167 | th_comment t_comment; |
168 | ogg_packet o_packet; |
169 | unsigned int offset; |
170 | TheoraContext *h = avc_context->priv_data; |
171 | uint32_t gop_size = avc_context->gop_size; |
172 | int ret; |
173 | |
174 | /* Set up the theora_info struct */ |
175 | th_info_init(&t_info); |
176 | t_info.frame_width = FFALIGN(avc_context->width, 16); |
177 | t_info.frame_height = FFALIGN(avc_context->height, 16); |
178 | t_info.pic_width = avc_context->width; |
179 | t_info.pic_height = avc_context->height; |
180 | t_info.pic_x = 0; |
181 | t_info.pic_y = 0; |
182 | /* Swap numerator and denominator as time_base in AVCodecContext gives the |
183 | * time period between frames, but theora_info needs the framerate. */ |
184 | t_info.fps_numerator = avc_context->time_base.den; |
185 | t_info.fps_denominator = avc_context->time_base.num; |
186 | if (avc_context->sample_aspect_ratio.num) { |
187 | t_info.aspect_numerator = avc_context->sample_aspect_ratio.num; |
188 | t_info.aspect_denominator = avc_context->sample_aspect_ratio.den; |
189 | } else { |
190 | t_info.aspect_numerator = 1; |
191 | t_info.aspect_denominator = 1; |
192 | } |
193 | |
194 | if (avc_context->color_primaries == AVCOL_PRI_BT470M) |
195 | t_info.colorspace = TH_CS_ITU_REC_470M; |
196 | else if (avc_context->color_primaries == AVCOL_PRI_BT470BG) |
197 | t_info.colorspace = TH_CS_ITU_REC_470BG; |
198 | else |
199 | t_info.colorspace = TH_CS_UNSPECIFIED; |
200 | |
201 | if (avc_context->pix_fmt == AV_PIX_FMT_YUV420P) |
202 | t_info.pixel_fmt = TH_PF_420; |
203 | else if (avc_context->pix_fmt == AV_PIX_FMT_YUV422P) |
204 | t_info.pixel_fmt = TH_PF_422; |
205 | else if (avc_context->pix_fmt == AV_PIX_FMT_YUV444P) |
206 | t_info.pixel_fmt = TH_PF_444; |
207 | else { |
208 | av_log(avc_context, AV_LOG_ERROR, "Unsupported pix_fmt\n"); |
209 | return AVERROR(EINVAL); |
210 | } |
211 | avcodec_get_chroma_sub_sample(avc_context->pix_fmt, &h->uv_hshift, &h->uv_vshift); |
212 | |
213 | if (avc_context->flags & AV_CODEC_FLAG_QSCALE) { |
214 | /* Clip global_quality in QP units to the [0 - 10] range |
215 | to be consistent with the libvorbis implementation. |
216 | Theora accepts a quality parameter which is an int value in |
217 | the [0 - 63] range. |
218 | */ |
219 | t_info.quality = av_clipf(avc_context->global_quality / (float)FF_QP2LAMBDA, 0, 10) * 6.3; |
220 | t_info.target_bitrate = 0; |
221 | } else { |
222 | t_info.target_bitrate = avc_context->bit_rate; |
223 | t_info.quality = 0; |
224 | } |
225 | |
226 | /* Now initialise libtheora */ |
227 | h->t_state = th_encode_alloc(&t_info); |
228 | if (!h->t_state) { |
229 | av_log(avc_context, AV_LOG_ERROR, "theora_encode_init failed\n"); |
230 | return AVERROR_EXTERNAL; |
231 | } |
232 | |
233 | h->keyframe_mask = (1 << t_info.keyframe_granule_shift) - 1; |
234 | /* Clear up theora_info struct */ |
235 | th_info_clear(&t_info); |
236 | |
237 | if (th_encode_ctl(h->t_state, TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE, |
238 | &gop_size, sizeof(gop_size))) { |
239 | av_log(avc_context, AV_LOG_ERROR, "Error setting GOP size\n"); |
240 | return AVERROR_EXTERNAL; |
241 | } |
242 | |
243 | // need to enable 2 pass (via TH_ENCCTL_2PASS_) before encoding headers |
244 | if (avc_context->flags & AV_CODEC_FLAG_PASS1) { |
245 | if ((ret = get_stats(avc_context, 0)) < 0) |
246 | return ret; |
247 | } else if (avc_context->flags & AV_CODEC_FLAG_PASS2) { |
248 | if ((ret = submit_stats(avc_context)) < 0) |
249 | return ret; |
250 | } |
251 | |
252 | /* |
253 | Output first header packet consisting of theora |
254 | header, comment, and tables. |
255 | |
256 | Each one is prefixed with a 16-bit size, then they |
257 | are concatenated together into libavcodec's extradata. |
258 | */ |
259 | offset = 0; |
260 | |
261 | /* Headers */ |
262 | th_comment_init(&t_comment); |
263 | |
264 | while (th_encode_flushheader(h->t_state, &t_comment, &o_packet)) |
265 | if ((ret = concatenate_packet(&offset, avc_context, &o_packet)) < 0) |
266 | return ret; |
267 | |
268 | th_comment_clear(&t_comment); |
269 | |
270 | return 0; |
271 | } |
272 | |
273 | static int encode_frame(AVCodecContext* avc_context, AVPacket *pkt, |
274 | const AVFrame *frame, int *got_packet) |
275 | { |
276 | th_ycbcr_buffer t_yuv_buffer; |
277 | TheoraContext *h = avc_context->priv_data; |
278 | ogg_packet o_packet; |
279 | int result, i, ret; |
280 | |
281 | // EOS, finish and get 1st pass stats if applicable |
282 | if (!frame) { |
283 | th_encode_packetout(h->t_state, 1, &o_packet); |
284 | if (avc_context->flags & AV_CODEC_FLAG_PASS1) |
285 | if ((ret = get_stats(avc_context, 1)) < 0) |
286 | return ret; |
287 | return 0; |
288 | } |
289 | |
290 | /* Copy planes to the theora yuv_buffer */ |
291 | for (i = 0; i < 3; i++) { |
292 | t_yuv_buffer[i].width = FFALIGN(avc_context->width, 16) >> (i && h->uv_hshift); |
293 | t_yuv_buffer[i].height = FFALIGN(avc_context->height, 16) >> (i && h->uv_vshift); |
294 | t_yuv_buffer[i].stride = frame->linesize[i]; |
295 | t_yuv_buffer[i].data = frame->data[i]; |
296 | } |
297 | |
298 | if (avc_context->flags & AV_CODEC_FLAG_PASS2) |
299 | if ((ret = submit_stats(avc_context)) < 0) |
300 | return ret; |
301 | |
302 | /* Now call into theora_encode_YUVin */ |
303 | result = th_encode_ycbcr_in(h->t_state, t_yuv_buffer); |
304 | if (result) { |
305 | const char* message; |
306 | switch (result) { |
307 | case -1: |
308 | message = "differing frame sizes"; |
309 | break; |
310 | case TH_EINVAL: |
311 | message = "encoder is not ready or is finished"; |
312 | break; |
313 | default: |
314 | message = "unknown reason"; |
315 | break; |
316 | } |
317 | av_log(avc_context, AV_LOG_ERROR, "theora_encode_YUVin failed (%s) [%d]\n", message, result); |
318 | return AVERROR_EXTERNAL; |
319 | } |
320 | |
321 | if (avc_context->flags & AV_CODEC_FLAG_PASS1) |
322 | if ((ret = get_stats(avc_context, 0)) < 0) |
323 | return ret; |
324 | |
325 | /* Pick up returned ogg_packet */ |
326 | result = th_encode_packetout(h->t_state, 0, &o_packet); |
327 | switch (result) { |
328 | case 0: |
329 | /* No packet is ready */ |
330 | return 0; |
331 | case 1: |
332 | /* Success, we have a packet */ |
333 | break; |
334 | default: |
335 | av_log(avc_context, AV_LOG_ERROR, "theora_encode_packetout failed [%d]\n", result); |
336 | return AVERROR_EXTERNAL; |
337 | } |
338 | |
339 | /* Copy ogg_packet content out to buffer */ |
340 | if ((ret = ff_alloc_packet2(avc_context, pkt, o_packet.bytes, 0)) < 0) |
341 | return ret; |
342 | memcpy(pkt->data, o_packet.packet, o_packet.bytes); |
343 | |
344 | // HACK: assumes no encoder delay, this is true until libtheora becomes |
345 | // multithreaded (which will be disabled unless explicitly requested) |
346 | pkt->pts = pkt->dts = frame->pts; |
347 | #if FF_API_CODED_FRAME |
348 | FF_DISABLE_DEPRECATION_WARNINGS |
349 | avc_context->coded_frame->key_frame = !(o_packet.granulepos & h->keyframe_mask); |
350 | FF_ENABLE_DEPRECATION_WARNINGS |
351 | #endif |
352 | if (!(o_packet.granulepos & h->keyframe_mask)) |
353 | pkt->flags |= AV_PKT_FLAG_KEY; |
354 | *got_packet = 1; |
355 | |
356 | return 0; |
357 | } |
358 | |
359 | static av_cold int encode_close(AVCodecContext* avc_context) |
360 | { |
361 | TheoraContext *h = avc_context->priv_data; |
362 | |
363 | th_encode_free(h->t_state); |
364 | av_freep(&h->stats); |
365 | av_freep(&avc_context->stats_out); |
366 | av_freep(&avc_context->extradata); |
367 | avc_context->extradata_size = 0; |
368 | |
369 | return 0; |
370 | } |
371 | |
372 | /** AVCodec struct exposed to libavcodec */ |
373 | AVCodec ff_libtheora_encoder = { |
374 | .name = "libtheora", |
375 | .long_name = NULL_IF_CONFIG_SMALL("libtheora Theora"), |
376 | .type = AVMEDIA_TYPE_VIDEO, |
377 | .id = AV_CODEC_ID_THEORA, |
378 | .priv_data_size = sizeof(TheoraContext), |
379 | .init = encode_init, |
380 | .close = encode_close, |
381 | .encode2 = encode_frame, |
382 | .capabilities = AV_CODEC_CAP_DELAY, // needed to get the statsfile summary |
383 | .pix_fmts = (const enum AVPixelFormat[]){ |
384 | AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_NONE |
385 | }, |
386 | }; |
387 |