blob: d7e6579af7155ccc436414d7328caac07f14ce15
1 | /* |
2 | * Copyright (c) 2014-2015 Michael Niedermayer <michaelni@gmx.at> |
3 | * |
4 | * This file is part of FFmpeg. |
5 | * |
6 | * FFmpeg is free software; you can redistribute it and/or modify |
7 | * it under the terms of the GNU General Public License as published by |
8 | * the Free Software Foundation; either version 2 of the License, or |
9 | * (at your option) any later version. |
10 | * |
11 | * FFmpeg is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | * GNU General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU General Public License along |
17 | * with FFmpeg; if not, write to the Free Software Foundation, Inc., |
18 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
19 | */ |
20 | |
21 | /** |
22 | * @todo switch to dualinput |
23 | */ |
24 | |
25 | #include "libavutil/avassert.h" |
26 | #include "libavutil/imgutils.h" |
27 | #include "libavutil/opt.h" |
28 | #include "internal.h" |
29 | |
30 | #include "lavfutils.h" |
31 | |
32 | #define MAX_MIPMAPS 5 |
33 | |
34 | typedef struct FOCContext { |
35 | AVClass *class; |
36 | float threshold; |
37 | int mipmaps; |
38 | int xmin, ymin, xmax, ymax; |
39 | char *obj_filename; |
40 | int last_x, last_y; |
41 | AVFrame *obj_frame; |
42 | AVFrame *needle_frame[MAX_MIPMAPS]; |
43 | AVFrame *haystack_frame[MAX_MIPMAPS]; |
44 | } FOCContext; |
45 | |
46 | #define OFFSET(x) offsetof(FOCContext, x) |
47 | #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM |
48 | static const AVOption find_rect_options[] = { |
49 | { "object", "object bitmap filename", OFFSET(obj_filename), AV_OPT_TYPE_STRING, {.str = NULL}, .flags = FLAGS }, |
50 | { "threshold", "set threshold", OFFSET(threshold), AV_OPT_TYPE_FLOAT, {.dbl = 0.5}, 0, 1.0, FLAGS }, |
51 | { "mipmaps", "set mipmaps", OFFSET(mipmaps), AV_OPT_TYPE_INT, {.i64 = 3}, 1, MAX_MIPMAPS, FLAGS }, |
52 | { "xmin", "", OFFSET(xmin), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS }, |
53 | { "ymin", "", OFFSET(ymin), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS }, |
54 | { "xmax", "", OFFSET(xmax), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS }, |
55 | { "ymax", "", OFFSET(ymax), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS }, |
56 | { NULL } |
57 | }; |
58 | |
59 | AVFILTER_DEFINE_CLASS(find_rect); |
60 | |
61 | static int query_formats(AVFilterContext *ctx) |
62 | { |
63 | static const enum AVPixelFormat pix_fmts[] = { |
64 | AV_PIX_FMT_YUV420P, |
65 | AV_PIX_FMT_YUVJ420P, |
66 | AV_PIX_FMT_NONE |
67 | }; |
68 | |
69 | return ff_set_common_formats(ctx, ff_make_format_list(pix_fmts)); |
70 | } |
71 | |
72 | static AVFrame *downscale(AVFrame *in) |
73 | { |
74 | int x, y; |
75 | AVFrame *frame = av_frame_alloc(); |
76 | uint8_t *src, *dst; |
77 | if (!frame) |
78 | return NULL; |
79 | |
80 | frame->format = in->format; |
81 | frame->width = (in->width + 1) / 2; |
82 | frame->height = (in->height+ 1) / 2; |
83 | |
84 | if (av_frame_get_buffer(frame, 32) < 0) { |
85 | av_frame_free(&frame); |
86 | return NULL; |
87 | } |
88 | src = in ->data[0]; |
89 | dst = frame->data[0]; |
90 | |
91 | for(y = 0; y < frame->height; y++) { |
92 | for(x = 0; x < frame->width; x++) { |
93 | dst[x] = ( src[2*x+0] |
94 | + src[2*x+1] |
95 | + src[2*x+0 + in->linesize[0]] |
96 | + src[2*x+1 + in->linesize[0]] |
97 | + 2) >> 2; |
98 | } |
99 | src += 2*in->linesize[0]; |
100 | dst += frame->linesize[0]; |
101 | } |
102 | return frame; |
103 | } |
104 | |
105 | static float compare(const AVFrame *haystack, const AVFrame *obj, int offx, int offy) |
106 | { |
107 | int x,y; |
108 | int o_sum_v = 0; |
109 | int h_sum_v = 0; |
110 | int64_t oo_sum_v = 0; |
111 | int64_t hh_sum_v = 0; |
112 | int64_t oh_sum_v = 0; |
113 | float c; |
114 | int n = obj->height * obj->width; |
115 | const uint8_t *odat = obj ->data[0]; |
116 | const uint8_t *hdat = haystack->data[0] + offx + offy * haystack->linesize[0]; |
117 | int64_t o_sigma, h_sigma; |
118 | |
119 | for(y = 0; y < obj->height; y++) { |
120 | for(x = 0; x < obj->width; x++) { |
121 | int o_v = odat[x]; |
122 | int h_v = hdat[x]; |
123 | o_sum_v += o_v; |
124 | h_sum_v += h_v; |
125 | oo_sum_v += o_v * o_v; |
126 | hh_sum_v += h_v * h_v; |
127 | oh_sum_v += o_v * h_v; |
128 | } |
129 | odat += obj->linesize[0]; |
130 | hdat += haystack->linesize[0]; |
131 | } |
132 | o_sigma = n*oo_sum_v - o_sum_v*(int64_t)o_sum_v; |
133 | h_sigma = n*hh_sum_v - h_sum_v*(int64_t)h_sum_v; |
134 | |
135 | if (o_sigma == 0 || h_sigma == 0) |
136 | return 1.0; |
137 | |
138 | c = (n*oh_sum_v - o_sum_v*(int64_t)h_sum_v) / (sqrt(o_sigma)*sqrt(h_sigma)); |
139 | |
140 | return 1 - fabs(c); |
141 | } |
142 | |
143 | static int config_input(AVFilterLink *inlink) |
144 | { |
145 | AVFilterContext *ctx = inlink->dst; |
146 | FOCContext *foc = ctx->priv; |
147 | |
148 | if (foc->xmax <= 0) |
149 | foc->xmax = inlink->w - foc->obj_frame->width; |
150 | if (foc->ymax <= 0) |
151 | foc->ymax = inlink->h - foc->obj_frame->height; |
152 | |
153 | return 0; |
154 | } |
155 | |
156 | static float search(FOCContext *foc, int pass, int maxpass, int xmin, int xmax, int ymin, int ymax, int *best_x, int *best_y, float best_score) |
157 | { |
158 | int x, y; |
159 | |
160 | if (pass + 1 <= maxpass) { |
161 | int sub_x, sub_y; |
162 | search(foc, pass+1, maxpass, xmin>>1, (xmax+1)>>1, ymin>>1, (ymax+1)>>1, &sub_x, &sub_y, 1.0); |
163 | xmin = FFMAX(xmin, 2*sub_x - 4); |
164 | xmax = FFMIN(xmax, 2*sub_x + 4); |
165 | ymin = FFMAX(ymin, 2*sub_y - 4); |
166 | ymax = FFMIN(ymax, 2*sub_y + 4); |
167 | } |
168 | |
169 | for (y = ymin; y <= ymax; y++) { |
170 | for (x = xmin; x <= xmax; x++) { |
171 | float score = compare(foc->haystack_frame[pass], foc->needle_frame[pass], x, y); |
172 | av_assert0(score != 0); |
173 | if (score < best_score) { |
174 | best_score = score; |
175 | *best_x = x; |
176 | *best_y = y; |
177 | } |
178 | } |
179 | } |
180 | return best_score; |
181 | } |
182 | |
183 | static int filter_frame(AVFilterLink *inlink, AVFrame *in) |
184 | { |
185 | AVFilterContext *ctx = inlink->dst; |
186 | FOCContext *foc = ctx->priv; |
187 | float best_score; |
188 | int best_x, best_y; |
189 | int i; |
190 | |
191 | foc->haystack_frame[0] = av_frame_clone(in); |
192 | for (i=1; i<foc->mipmaps; i++) { |
193 | foc->haystack_frame[i] = downscale(foc->haystack_frame[i-1]); |
194 | } |
195 | |
196 | best_score = search(foc, 0, 0, |
197 | FFMAX(foc->xmin, foc->last_x - 8), |
198 | FFMIN(foc->xmax, foc->last_x + 8), |
199 | FFMAX(foc->ymin, foc->last_y - 8), |
200 | FFMIN(foc->ymax, foc->last_y + 8), |
201 | &best_x, &best_y, 1.0); |
202 | |
203 | best_score = search(foc, 0, foc->mipmaps - 1, foc->xmin, foc->xmax, foc->ymin, foc->ymax, |
204 | &best_x, &best_y, best_score); |
205 | |
206 | for (i=0; i<MAX_MIPMAPS; i++) { |
207 | av_frame_free(&foc->haystack_frame[i]); |
208 | } |
209 | |
210 | if (best_score > foc->threshold) { |
211 | return ff_filter_frame(ctx->outputs[0], in); |
212 | } |
213 | |
214 | av_log(ctx, AV_LOG_DEBUG, "Found at %d %d score %f\n", best_x, best_y, best_score); |
215 | foc->last_x = best_x; |
216 | foc->last_y = best_y; |
217 | |
218 | av_frame_make_writable(in); |
219 | |
220 | av_dict_set_int(&in->metadata, "lavfi.rect.w", foc->obj_frame->width, 0); |
221 | av_dict_set_int(&in->metadata, "lavfi.rect.h", foc->obj_frame->height, 0); |
222 | av_dict_set_int(&in->metadata, "lavfi.rect.x", best_x, 0); |
223 | av_dict_set_int(&in->metadata, "lavfi.rect.y", best_y, 0); |
224 | |
225 | return ff_filter_frame(ctx->outputs[0], in); |
226 | } |
227 | |
228 | static av_cold void uninit(AVFilterContext *ctx) |
229 | { |
230 | FOCContext *foc = ctx->priv; |
231 | int i; |
232 | |
233 | for (i = 0; i < MAX_MIPMAPS; i++) { |
234 | av_frame_free(&foc->needle_frame[i]); |
235 | av_frame_free(&foc->haystack_frame[i]); |
236 | } |
237 | |
238 | if (foc->obj_frame) |
239 | av_freep(&foc->obj_frame->data[0]); |
240 | av_frame_free(&foc->obj_frame); |
241 | } |
242 | |
243 | static av_cold int init(AVFilterContext *ctx) |
244 | { |
245 | FOCContext *foc = ctx->priv; |
246 | int ret, i; |
247 | |
248 | if (!foc->obj_filename) { |
249 | av_log(ctx, AV_LOG_ERROR, "object filename not set\n"); |
250 | return AVERROR(EINVAL); |
251 | } |
252 | |
253 | foc->obj_frame = av_frame_alloc(); |
254 | if (!foc->obj_frame) |
255 | return AVERROR(ENOMEM); |
256 | |
257 | if ((ret = ff_load_image(foc->obj_frame->data, foc->obj_frame->linesize, |
258 | &foc->obj_frame->width, &foc->obj_frame->height, |
259 | &foc->obj_frame->format, foc->obj_filename, ctx)) < 0) |
260 | return ret; |
261 | |
262 | if (foc->obj_frame->format != AV_PIX_FMT_GRAY8) { |
263 | av_log(ctx, AV_LOG_ERROR, "object image is not a grayscale image\n"); |
264 | return AVERROR(EINVAL); |
265 | } |
266 | |
267 | foc->needle_frame[0] = av_frame_clone(foc->obj_frame); |
268 | for (i = 1; i < foc->mipmaps; i++) { |
269 | foc->needle_frame[i] = downscale(foc->needle_frame[i-1]); |
270 | if (!foc->needle_frame[i]) |
271 | return AVERROR(ENOMEM); |
272 | } |
273 | |
274 | return 0; |
275 | } |
276 | |
277 | static const AVFilterPad foc_inputs[] = { |
278 | { |
279 | .name = "default", |
280 | .type = AVMEDIA_TYPE_VIDEO, |
281 | .config_props = config_input, |
282 | .filter_frame = filter_frame, |
283 | }, |
284 | { NULL } |
285 | }; |
286 | |
287 | static const AVFilterPad foc_outputs[] = { |
288 | { |
289 | .name = "default", |
290 | .type = AVMEDIA_TYPE_VIDEO, |
291 | }, |
292 | { NULL } |
293 | }; |
294 | |
295 | AVFilter ff_vf_find_rect = { |
296 | .name = "find_rect", |
297 | .description = NULL_IF_CONFIG_SMALL("Find a user specified object."), |
298 | .priv_size = sizeof(FOCContext), |
299 | .init = init, |
300 | .uninit = uninit, |
301 | .query_formats = query_formats, |
302 | .inputs = foc_inputs, |
303 | .outputs = foc_outputs, |
304 | .priv_class = &find_rect_class, |
305 | }; |
306 |