blob: dac0a2d19e5d7444d9033a2499a0b3612f1f887b
1 | /* |
2 | * Copyright (c) 2013 Georg Martius <georg dot martius at web dot de> |
3 | * |
4 | * This file is part of FFmpeg. |
5 | * |
6 | * FFmpeg is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU Lesser General Public |
8 | * License as published by the Free Software Foundation; either |
9 | * version 2.1 of the License, or (at your option) any later version. |
10 | * |
11 | * FFmpeg is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * Lesser General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU Lesser General Public |
17 | * License along with FFmpeg; if not, write to the Free Software |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | */ |
20 | |
21 | #define DEFAULT_INPUT_NAME "transforms.trf" |
22 | |
23 | #include <vid.stab/libvidstab.h> |
24 | |
25 | #include "libavutil/common.h" |
26 | #include "libavutil/opt.h" |
27 | #include "libavutil/imgutils.h" |
28 | #include "avfilter.h" |
29 | #include "internal.h" |
30 | |
31 | #include "vidstabutils.h" |
32 | |
33 | typedef struct { |
34 | const AVClass *class; |
35 | |
36 | VSTransformData td; |
37 | VSTransformConfig conf; |
38 | |
39 | VSTransformations trans; // transformations |
40 | char *input; // name of transform file |
41 | int tripod; |
42 | int debug; |
43 | } TransformContext; |
44 | |
45 | #define OFFSET(x) offsetof(TransformContext, x) |
46 | #define OFFSETC(x) (offsetof(TransformContext, conf)+offsetof(VSTransformConfig, x)) |
47 | #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM |
48 | |
49 | static const AVOption vidstabtransform_options[] = { |
50 | {"input", "set path to the file storing the transforms", OFFSET(input), |
51 | AV_OPT_TYPE_STRING, {.str = DEFAULT_INPUT_NAME}, .flags = FLAGS }, |
52 | {"smoothing", "set number of frames*2 + 1 used for lowpass filtering", OFFSETC(smoothing), |
53 | AV_OPT_TYPE_INT, {.i64 = 15}, 0, 1000, FLAGS}, |
54 | |
55 | {"optalgo", "set camera path optimization algo", OFFSETC(camPathAlgo), |
56 | AV_OPT_TYPE_INT, {.i64 = VSOptimalL1}, VSOptimalL1, VSAvg, FLAGS, "optalgo"}, |
57 | { "opt", "global optimization", 0, // from version 1.0 on |
58 | AV_OPT_TYPE_CONST, {.i64 = VSOptimalL1 }, 0, 0, FLAGS, "optalgo"}, |
59 | { "gauss", "gaussian kernel", 0, |
60 | AV_OPT_TYPE_CONST, {.i64 = VSGaussian }, 0, 0, FLAGS, "optalgo"}, |
61 | { "avg", "simple averaging on motion", 0, |
62 | AV_OPT_TYPE_CONST, {.i64 = VSAvg }, 0, 0, FLAGS, "optalgo"}, |
63 | |
64 | {"maxshift", "set maximal number of pixels to translate image", OFFSETC(maxShift), |
65 | AV_OPT_TYPE_INT, {.i64 = -1}, -1, 500, FLAGS}, |
66 | {"maxangle", "set maximal angle in rad to rotate image", OFFSETC(maxAngle), |
67 | AV_OPT_TYPE_DOUBLE, {.dbl = -1.0}, -1.0, 3.14, FLAGS}, |
68 | |
69 | {"crop", "set cropping mode", OFFSETC(crop), |
70 | AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, FLAGS, "crop"}, |
71 | { "keep", "keep border", 0, |
72 | AV_OPT_TYPE_CONST, {.i64 = VSKeepBorder }, 0, 0, FLAGS, "crop"}, |
73 | { "black", "black border", 0, |
74 | AV_OPT_TYPE_CONST, {.i64 = VSCropBorder }, 0, 0, FLAGS, "crop"}, |
75 | |
76 | {"invert", "invert transforms", OFFSETC(invert), |
77 | AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, FLAGS}, |
78 | {"relative", "consider transforms as relative", OFFSETC(relative), |
79 | AV_OPT_TYPE_INT, {.i64 = 1}, 0, 1, FLAGS}, |
80 | {"zoom", "set percentage to zoom (>0: zoom in, <0: zoom out", OFFSETC(zoom), |
81 | AV_OPT_TYPE_DOUBLE, {.dbl = 0}, -100, 100, FLAGS}, |
82 | {"optzoom", "set optimal zoom (0: nothing, 1: optimal static zoom, 2: optimal dynamic zoom)", OFFSETC(optZoom), |
83 | AV_OPT_TYPE_INT, {.i64 = 1}, 0, 2, FLAGS}, |
84 | {"zoomspeed", "for adative zoom: percent to zoom maximally each frame", OFFSETC(zoomSpeed), |
85 | AV_OPT_TYPE_DOUBLE, {.dbl = 0.25}, 0, 5, FLAGS}, |
86 | |
87 | {"interpol", "set type of interpolation", OFFSETC(interpolType), |
88 | AV_OPT_TYPE_INT, {.i64 = 2}, 0, 3, FLAGS, "interpol"}, |
89 | { "no", "no interpolation", 0, |
90 | AV_OPT_TYPE_CONST, {.i64 = VS_Zero }, 0, 0, FLAGS, "interpol"}, |
91 | { "linear", "linear (horizontal)", 0, |
92 | AV_OPT_TYPE_CONST, {.i64 = VS_Linear }, 0, 0, FLAGS, "interpol"}, |
93 | { "bilinear","bi-linear", 0, |
94 | AV_OPT_TYPE_CONST, {.i64 = VS_BiLinear},0, 0, FLAGS, "interpol"}, |
95 | { "bicubic", "bi-cubic", 0, |
96 | AV_OPT_TYPE_CONST, {.i64 = VS_BiCubic },0, 0, FLAGS, "interpol"}, |
97 | |
98 | {"tripod", "enable virtual tripod mode (same as relative=0:smoothing=0)", OFFSET(tripod), |
99 | AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS}, |
100 | {"debug", "enable debug mode and writer global motions information to file", OFFSET(debug), |
101 | AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS}, |
102 | {NULL} |
103 | }; |
104 | |
105 | AVFILTER_DEFINE_CLASS(vidstabtransform); |
106 | |
107 | static av_cold int init(AVFilterContext *ctx) |
108 | { |
109 | TransformContext *tc = ctx->priv; |
110 | ff_vs_init(); |
111 | tc->class = &vidstabtransform_class; |
112 | av_log(ctx, AV_LOG_VERBOSE, "vidstabtransform filter: init %s\n", LIBVIDSTAB_VERSION); |
113 | return 0; |
114 | } |
115 | |
116 | static av_cold void uninit(AVFilterContext *ctx) |
117 | { |
118 | TransformContext *tc = ctx->priv; |
119 | |
120 | vsTransformDataCleanup(&tc->td); |
121 | vsTransformationsCleanup(&tc->trans); |
122 | } |
123 | |
124 | static int query_formats(AVFilterContext *ctx) |
125 | { |
126 | // If you add something here also add it in vidstabutils.c |
127 | static const enum AVPixelFormat pix_fmts[] = { |
128 | AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P, |
129 | AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUVA420P, |
130 | AV_PIX_FMT_YUV440P, AV_PIX_FMT_GRAY8, |
131 | AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24, AV_PIX_FMT_RGBA, |
132 | AV_PIX_FMT_NONE |
133 | }; |
134 | |
135 | AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts); |
136 | if (!fmts_list) |
137 | return AVERROR(ENOMEM); |
138 | return ff_set_common_formats(ctx, fmts_list); |
139 | } |
140 | |
141 | |
142 | static int config_input(AVFilterLink *inlink) |
143 | { |
144 | AVFilterContext *ctx = inlink->dst; |
145 | TransformContext *tc = ctx->priv; |
146 | FILE *f; |
147 | |
148 | const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format); |
149 | |
150 | VSTransformData *td = &(tc->td); |
151 | |
152 | VSFrameInfo fi_src; |
153 | VSFrameInfo fi_dest; |
154 | |
155 | if (!vsFrameInfoInit(&fi_src, inlink->w, inlink->h, |
156 | ff_av2vs_pixfmt(ctx, inlink->format)) || |
157 | !vsFrameInfoInit(&fi_dest, inlink->w, inlink->h, |
158 | ff_av2vs_pixfmt(ctx, inlink->format))) { |
159 | av_log(ctx, AV_LOG_ERROR, "unknown pixel format: %i (%s)", |
160 | inlink->format, desc->name); |
161 | return AVERROR(EINVAL); |
162 | } |
163 | |
164 | if (fi_src.bytesPerPixel != av_get_bits_per_pixel(desc)/8 || |
165 | fi_src.log2ChromaW != desc->log2_chroma_w || |
166 | fi_src.log2ChromaH != desc->log2_chroma_h) { |
167 | av_log(ctx, AV_LOG_ERROR, "pixel-format error: bpp %i<>%i ", |
168 | fi_src.bytesPerPixel, av_get_bits_per_pixel(desc)/8); |
169 | av_log(ctx, AV_LOG_ERROR, "chroma_subsampl: w: %i<>%i h: %i<>%i\n", |
170 | fi_src.log2ChromaW, desc->log2_chroma_w, |
171 | fi_src.log2ChromaH, desc->log2_chroma_h); |
172 | return AVERROR(EINVAL); |
173 | } |
174 | |
175 | // set values that are not initializes by the options |
176 | tc->conf.modName = "vidstabtransform"; |
177 | tc->conf.verbose = 1 + tc->debug; |
178 | if (tc->tripod) { |
179 | av_log(ctx, AV_LOG_INFO, "Virtual tripod mode: relative=0, smoothing=0\n"); |
180 | tc->conf.relative = 0; |
181 | tc->conf.smoothing = 0; |
182 | } |
183 | tc->conf.simpleMotionCalculation = 0; |
184 | tc->conf.storeTransforms = tc->debug; |
185 | tc->conf.smoothZoom = 0; |
186 | |
187 | if (vsTransformDataInit(td, &tc->conf, &fi_src, &fi_dest) != VS_OK) { |
188 | av_log(ctx, AV_LOG_ERROR, "initialization of vid.stab transform failed, please report a BUG\n"); |
189 | return AVERROR(EINVAL); |
190 | } |
191 | |
192 | vsTransformGetConfig(&tc->conf, td); |
193 | av_log(ctx, AV_LOG_INFO, "Video transformation/stabilization settings (pass 2/2):\n"); |
194 | av_log(ctx, AV_LOG_INFO, " input = %s\n", tc->input); |
195 | av_log(ctx, AV_LOG_INFO, " smoothing = %d\n", tc->conf.smoothing); |
196 | av_log(ctx, AV_LOG_INFO, " optalgo = %s\n", |
197 | tc->conf.camPathAlgo == VSOptimalL1 ? "opt" : |
198 | (tc->conf.camPathAlgo == VSGaussian ? "gauss" : "avg")); |
199 | av_log(ctx, AV_LOG_INFO, " maxshift = %d\n", tc->conf.maxShift); |
200 | av_log(ctx, AV_LOG_INFO, " maxangle = %f\n", tc->conf.maxAngle); |
201 | av_log(ctx, AV_LOG_INFO, " crop = %s\n", tc->conf.crop ? "Black" : "Keep"); |
202 | av_log(ctx, AV_LOG_INFO, " relative = %s\n", tc->conf.relative ? "True": "False"); |
203 | av_log(ctx, AV_LOG_INFO, " invert = %s\n", tc->conf.invert ? "True" : "False"); |
204 | av_log(ctx, AV_LOG_INFO, " zoom = %f\n", tc->conf.zoom); |
205 | av_log(ctx, AV_LOG_INFO, " optzoom = %s\n", |
206 | tc->conf.optZoom == 1 ? "Static (1)" : (tc->conf.optZoom == 2 ? "Dynamic (2)" : "Off (0)")); |
207 | if (tc->conf.optZoom == 2) |
208 | av_log(ctx, AV_LOG_INFO, " zoomspeed = %g\n", tc->conf.zoomSpeed); |
209 | av_log(ctx, AV_LOG_INFO, " interpol = %s\n", getInterpolationTypeName(tc->conf.interpolType)); |
210 | |
211 | f = fopen(tc->input, "r"); |
212 | if (!f) { |
213 | int ret = AVERROR(errno); |
214 | av_log(ctx, AV_LOG_ERROR, "cannot open input file %s\n", tc->input); |
215 | return ret; |
216 | } else { |
217 | VSManyLocalMotions mlms; |
218 | if (vsReadLocalMotionsFile(f, &mlms) == VS_OK) { |
219 | // calculate the actual transforms from the local motions |
220 | if (vsLocalmotions2Transforms(td, &mlms, &tc->trans) != VS_OK) { |
221 | av_log(ctx, AV_LOG_ERROR, "calculating transformations failed\n"); |
222 | return AVERROR(EINVAL); |
223 | } |
224 | } else { // try to read old format |
225 | if (!vsReadOldTransforms(td, f, &tc->trans)) { /* read input file */ |
226 | av_log(ctx, AV_LOG_ERROR, "error parsing input file %s\n", tc->input); |
227 | return AVERROR(EINVAL); |
228 | } |
229 | } |
230 | } |
231 | fclose(f); |
232 | |
233 | if (vsPreprocessTransforms(td, &tc->trans) != VS_OK) { |
234 | av_log(ctx, AV_LOG_ERROR, "error while preprocessing transforms\n"); |
235 | return AVERROR(EINVAL); |
236 | } |
237 | |
238 | // TODO: add sharpening, so far the user needs to call the unsharp filter manually |
239 | return 0; |
240 | } |
241 | |
242 | |
243 | static int filter_frame(AVFilterLink *inlink, AVFrame *in) |
244 | { |
245 | AVFilterContext *ctx = inlink->dst; |
246 | TransformContext *tc = ctx->priv; |
247 | VSTransformData* td = &(tc->td); |
248 | |
249 | AVFilterLink *outlink = inlink->dst->outputs[0]; |
250 | int direct = 0; |
251 | AVFrame *out; |
252 | VSFrame inframe; |
253 | int plane; |
254 | |
255 | if (av_frame_is_writable(in)) { |
256 | direct = 1; |
257 | out = in; |
258 | } else { |
259 | out = ff_get_video_buffer(outlink, outlink->w, outlink->h); |
260 | if (!out) { |
261 | av_frame_free(&in); |
262 | return AVERROR(ENOMEM); |
263 | } |
264 | av_frame_copy_props(out, in); |
265 | } |
266 | |
267 | for (plane = 0; plane < vsTransformGetSrcFrameInfo(td)->planes; plane++) { |
268 | inframe.data[plane] = in->data[plane]; |
269 | inframe.linesize[plane] = in->linesize[plane]; |
270 | } |
271 | if (direct) { |
272 | vsTransformPrepare(td, &inframe, &inframe); |
273 | } else { // separate frames |
274 | VSFrame outframe; |
275 | for (plane = 0; plane < vsTransformGetDestFrameInfo(td)->planes; plane++) { |
276 | outframe.data[plane] = out->data[plane]; |
277 | outframe.linesize[plane] = out->linesize[plane]; |
278 | } |
279 | vsTransformPrepare(td, &inframe, &outframe); |
280 | } |
281 | |
282 | vsDoTransform(td, vsGetNextTransform(td, &tc->trans)); |
283 | |
284 | vsTransformFinish(td); |
285 | |
286 | if (!direct) |
287 | av_frame_free(&in); |
288 | |
289 | return ff_filter_frame(outlink, out); |
290 | } |
291 | |
292 | static const AVFilterPad avfilter_vf_vidstabtransform_inputs[] = { |
293 | { |
294 | .name = "default", |
295 | .type = AVMEDIA_TYPE_VIDEO, |
296 | .filter_frame = filter_frame, |
297 | .config_props = config_input, |
298 | }, |
299 | { NULL } |
300 | }; |
301 | |
302 | static const AVFilterPad avfilter_vf_vidstabtransform_outputs[] = { |
303 | { |
304 | .name = "default", |
305 | .type = AVMEDIA_TYPE_VIDEO, |
306 | }, |
307 | { NULL } |
308 | }; |
309 | |
310 | AVFilter ff_vf_vidstabtransform = { |
311 | .name = "vidstabtransform", |
312 | .description = NULL_IF_CONFIG_SMALL("Transform the frames, " |
313 | "pass 2 of 2 for stabilization " |
314 | "(see vidstabdetect for pass 1)."), |
315 | .priv_size = sizeof(TransformContext), |
316 | .init = init, |
317 | .uninit = uninit, |
318 | .query_formats = query_formats, |
319 | .inputs = avfilter_vf_vidstabtransform_inputs, |
320 | .outputs = avfilter_vf_vidstabtransform_outputs, |
321 | .priv_class = &vidstabtransform_class, |
322 | }; |
323 |