blob: 06b1b910d4d8500ab1d11b143f62fd3f6d430481
1 | /* |
2 | * Copyright (c) 2017 Gerion Entrup |
3 | * |
4 | * This file is part of FFmpeg. |
5 | * |
6 | * FFmpeg is free software; you can redistribute it and/or modify |
7 | * it under the terms of the GNU General Public License as published by |
8 | * the Free Software Foundation; either version 2 of the License, or |
9 | * (at your option) any later version. |
10 | * |
11 | * FFmpeg is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | * GNU General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU General Public License along |
17 | * with FFmpeg; if not, write to the Free Software Foundation, Inc., |
18 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
19 | */ |
20 | |
21 | /** |
22 | * @file |
23 | * MPEG-7 video signature calculation and lookup filter |
24 | * @see http://epubs.surrey.ac.uk/531590/1/MPEG-7%20Video%20Signature%20Author%27s%20Copy.pdf |
25 | */ |
26 | |
27 | #include <float.h> |
28 | #include "libavcodec/put_bits.h" |
29 | #include "libavformat/avformat.h" |
30 | #include "libavutil/opt.h" |
31 | #include "libavutil/avstring.h" |
32 | #include "libavutil/intreadwrite.h" |
33 | #include "libavutil/timestamp.h" |
34 | #include "avfilter.h" |
35 | #include "internal.h" |
36 | #include "signature.h" |
37 | #include "signature_lookup.c" |
38 | |
39 | #define OFFSET(x) offsetof(SignatureContext, x) |
40 | #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM |
41 | #define BLOCK_LCM (int64_t) 476985600 |
42 | |
43 | static const AVOption signature_options[] = { |
44 | { "detectmode", "set the detectmode", |
45 | OFFSET(mode), AV_OPT_TYPE_INT, {.i64 = MODE_OFF}, 0, NB_LOOKUP_MODE-1, FLAGS, "mode" }, |
46 | { "off", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MODE_OFF}, 0, 0, .flags = FLAGS, "mode" }, |
47 | { "full", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MODE_FULL}, 0, 0, .flags = FLAGS, "mode" }, |
48 | { "fast", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MODE_FAST}, 0, 0, .flags = FLAGS, "mode" }, |
49 | { "nb_inputs", "number of inputs", |
50 | OFFSET(nb_inputs), AV_OPT_TYPE_INT, {.i64 = 1}, 1, INT_MAX, FLAGS }, |
51 | { "filename", "filename for output files", |
52 | OFFSET(filename), AV_OPT_TYPE_STRING, {.str = ""}, 0, NB_FORMATS-1, FLAGS }, |
53 | { "format", "set output format", |
54 | OFFSET(format), AV_OPT_TYPE_INT, {.i64 = FORMAT_BINARY}, 0, 1, FLAGS , "format" }, |
55 | { "binary", 0, 0, AV_OPT_TYPE_CONST, {.i64=FORMAT_BINARY}, 0, 0, FLAGS, "format" }, |
56 | { "xml", 0, 0, AV_OPT_TYPE_CONST, {.i64=FORMAT_XML}, 0, 0, FLAGS, "format" }, |
57 | { "th_d", "threshold to detect one word as similar", |
58 | OFFSET(thworddist), AV_OPT_TYPE_INT, {.i64 = 9000}, 1, INT_MAX, FLAGS }, |
59 | { "th_dc", "threshold to detect all words as similar", |
60 | OFFSET(thcomposdist), AV_OPT_TYPE_INT, {.i64 = 60000}, 1, INT_MAX, FLAGS }, |
61 | { "th_xh", "threshold to detect frames as similar", |
62 | OFFSET(thl1), AV_OPT_TYPE_INT, {.i64 = 116}, 1, INT_MAX, FLAGS }, |
63 | { "th_di", "minimum length of matching sequence in frames", |
64 | OFFSET(thdi), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS }, |
65 | { "th_it", "threshold for relation of good to all frames", |
66 | OFFSET(thit), AV_OPT_TYPE_DOUBLE, {.dbl = 0.5}, 0.0, 1.0, FLAGS }, |
67 | { NULL } |
68 | }; |
69 | |
70 | AVFILTER_DEFINE_CLASS(signature); |
71 | |
72 | static int query_formats(AVFilterContext *ctx) |
73 | { |
74 | /* all formats with a separate gray value */ |
75 | static const enum AVPixelFormat pix_fmts[] = { |
76 | AV_PIX_FMT_GRAY8, |
77 | AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P, |
78 | AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, |
79 | AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P, |
80 | AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUVJ420P, |
81 | AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ444P, |
82 | AV_PIX_FMT_YUVJ440P, |
83 | AV_PIX_FMT_NV12, AV_PIX_FMT_NV21, |
84 | AV_PIX_FMT_NONE |
85 | }; |
86 | |
87 | return ff_set_common_formats(ctx, ff_make_format_list(pix_fmts)); |
88 | } |
89 | |
90 | static int config_input(AVFilterLink *inlink) |
91 | { |
92 | AVFilterContext *ctx = inlink->dst; |
93 | SignatureContext *sic = ctx->priv; |
94 | StreamContext *sc = &(sic->streamcontexts[FF_INLINK_IDX(inlink)]); |
95 | |
96 | sc->time_base = inlink->time_base; |
97 | /* test for overflow */ |
98 | sc->divide = (((uint64_t) inlink->w/32) * (inlink->w/32 + 1) * (inlink->h/32 * inlink->h/32 + 1) > INT64_MAX / (BLOCK_LCM * 255)); |
99 | if (sc->divide) { |
100 | av_log(ctx, AV_LOG_WARNING, "Input dimension too high for precise calculation, numbers will be rounded.\n"); |
101 | } |
102 | sc->w = inlink->w; |
103 | sc->h = inlink->h; |
104 | return 0; |
105 | } |
106 | |
107 | static int get_block_size(const Block *b) |
108 | { |
109 | return (b->to.y - b->up.y + 1) * (b->to.x - b->up.x + 1); |
110 | } |
111 | |
112 | static uint64_t get_block_sum(StreamContext *sc, uint64_t intpic[32][32], const Block *b) |
113 | { |
114 | uint64_t sum = 0; |
115 | |
116 | int x0, y0, x1, y1; |
117 | |
118 | x0 = b->up.x; |
119 | y0 = b->up.y; |
120 | x1 = b->to.x; |
121 | y1 = b->to.y; |
122 | |
123 | if (x0-1 >= 0 && y0-1 >= 0) { |
124 | sum = intpic[y1][x1] + intpic[y0-1][x0-1] - intpic[y1][x0-1] - intpic[y0-1][x1]; |
125 | } else if (x0-1 >= 0) { |
126 | sum = intpic[y1][x1] - intpic[y1][x0-1]; |
127 | } else if (y0-1 >= 0) { |
128 | sum = intpic[y1][x1] - intpic[y0-1][x1]; |
129 | } else { |
130 | sum = intpic[y1][x1]; |
131 | } |
132 | return sum; |
133 | } |
134 | |
135 | static int cmp(const uint64_t *a, const uint64_t *b) |
136 | { |
137 | return *a < *b ? -1 : ( *a > *b ? 1 : 0 ); |
138 | } |
139 | |
140 | /** |
141 | * sets the bit at position pos to 1 in data |
142 | */ |
143 | static void set_bit(uint8_t* data, size_t pos) |
144 | { |
145 | uint8_t mask = 1 << 7-(pos%8); |
146 | data[pos/8] |= mask; |
147 | } |
148 | |
149 | static int filter_frame(AVFilterLink *inlink, AVFrame *picref) |
150 | { |
151 | AVFilterContext *ctx = inlink->dst; |
152 | SignatureContext *sic = ctx->priv; |
153 | StreamContext *sc = &(sic->streamcontexts[FF_INLINK_IDX(inlink)]); |
154 | FineSignature* fs; |
155 | |
156 | static const uint8_t pot3[5] = { 3*3*3*3, 3*3*3, 3*3, 3, 1 }; |
157 | /* indexes of words : 210,217,219,274,334 44,175,233,270,273 57,70,103,237,269 100,285,295,337,354 101,102,111,275,296 |
158 | s2usw = sorted to unsorted wordvec: 44 is at index 5, 57 at index 10... |
159 | */ |
160 | static const unsigned int wordvec[25] = {44,57,70,100,101,102,103,111,175,210,217,219,233,237,269,270,273,274,275,285,295,296,334,337,354}; |
161 | static const uint8_t s2usw[25] = { 5,10,11, 15, 20, 21, 12, 22, 6, 0, 1, 2, 7, 13, 14, 8, 9, 3, 23, 16, 17, 24, 4, 18, 19}; |
162 | |
163 | uint8_t wordt2b[5] = { 0, 0, 0, 0, 0 }; /* word ternary to binary */ |
164 | uint64_t intpic[32][32]; |
165 | uint64_t rowcount; |
166 | uint8_t *p = picref->data[0]; |
167 | int inti, intj; |
168 | int *intjlut; |
169 | |
170 | uint64_t conflist[DIFFELEM_SIZE]; |
171 | int f = 0, g = 0, w = 0; |
172 | int32_t dh1 = 1, dh2 = 1, dw1 = 1, dw2 = 1, a, b; |
173 | int64_t denom; |
174 | int i, j, k, ternary; |
175 | uint64_t blocksum; |
176 | int blocksize; |
177 | int64_t th; /* threshold */ |
178 | int64_t sum; |
179 | |
180 | int64_t precfactor = (sc->divide) ? 65536 : BLOCK_LCM; |
181 | |
182 | /* initialize fs */ |
183 | if (sc->curfinesig) { |
184 | fs = av_mallocz(sizeof(FineSignature)); |
185 | if (!fs) |
186 | return AVERROR(ENOMEM); |
187 | sc->curfinesig->next = fs; |
188 | fs->prev = sc->curfinesig; |
189 | sc->curfinesig = fs; |
190 | } else { |
191 | fs = sc->curfinesig = sc->finesiglist; |
192 | sc->curcoarsesig1->first = fs; |
193 | } |
194 | |
195 | fs->pts = picref->pts; |
196 | fs->index = sc->lastindex++; |
197 | |
198 | memset(intpic, 0, sizeof(uint64_t)*32*32); |
199 | intjlut = av_malloc_array(inlink->w, sizeof(int)); |
200 | if (!intjlut) |
201 | return AVERROR(ENOMEM); |
202 | for (i = 0; i < inlink->w; i++) { |
203 | intjlut[i] = (i*32)/inlink->w; |
204 | } |
205 | |
206 | for (i = 0; i < inlink->h; i++) { |
207 | inti = (i*32)/inlink->h; |
208 | for (j = 0; j < inlink->w; j++) { |
209 | intj = intjlut[j]; |
210 | intpic[inti][intj] += p[j]; |
211 | } |
212 | p += picref->linesize[0]; |
213 | } |
214 | av_freep(&intjlut); |
215 | |
216 | /* The following calculates a summed area table (intpic) and brings the numbers |
217 | * in intpic to the same denominator. |
218 | * So you only have to handle the numinator in the following sections. |
219 | */ |
220 | dh1 = inlink->h / 32; |
221 | if (inlink->h % 32) |
222 | dh2 = dh1 + 1; |
223 | dw1 = inlink->w / 32; |
224 | if (inlink->w % 32) |
225 | dw2 = dw1 + 1; |
226 | denom = (sc->divide) ? dh1 * dh2 * dw1 * dw2 : 1; |
227 | |
228 | for (i = 0; i < 32; i++) { |
229 | rowcount = 0; |
230 | a = 1; |
231 | if (dh2 > 1) { |
232 | a = ((inlink->h*(i+1))%32 == 0) ? (inlink->h*(i+1))/32 - 1 : (inlink->h*(i+1))/32; |
233 | a -= ((inlink->h*i)%32 == 0) ? (inlink->h*i)/32 - 1 : (inlink->h*i)/32; |
234 | a = (a == dh1)? dh2 : dh1; |
235 | } |
236 | for (j = 0; j < 32; j++) { |
237 | b = 1; |
238 | if (dw2 > 1) { |
239 | b = ((inlink->w*(j+1))%32 == 0) ? (inlink->w*(j+1))/32 - 1 : (inlink->w*(j+1))/32; |
240 | b -= ((inlink->w*j)%32 == 0) ? (inlink->w*j)/32 - 1 : (inlink->w*j)/32; |
241 | b = (b == dw1)? dw2 : dw1; |
242 | } |
243 | rowcount += intpic[i][j] * a * b * precfactor / denom; |
244 | if (i > 0) { |
245 | intpic[i][j] = intpic[i-1][j] + rowcount; |
246 | } else { |
247 | intpic[i][j] = rowcount; |
248 | } |
249 | } |
250 | } |
251 | |
252 | denom = (sc->divide) ? 1 : dh1 * dh2 * dw1 * dw2; |
253 | |
254 | for (i = 0; i < ELEMENT_COUNT; i++) { |
255 | const ElemCat* elemcat = elements[i]; |
256 | int64_t* elemsignature; |
257 | uint64_t* sortsignature; |
258 | |
259 | elemsignature = av_malloc_array(elemcat->elem_count, sizeof(int64_t)); |
260 | if (!elemsignature) |
261 | return AVERROR(ENOMEM); |
262 | sortsignature = av_malloc_array(elemcat->elem_count, sizeof(int64_t)); |
263 | if (!sortsignature) |
264 | return AVERROR(ENOMEM); |
265 | |
266 | for (j = 0; j < elemcat->elem_count; j++) { |
267 | blocksum = 0; |
268 | blocksize = 0; |
269 | for (k = 0; k < elemcat->left_count; k++) { |
270 | blocksum += get_block_sum(sc, intpic, &elemcat->blocks[j*elemcat->block_count+k]); |
271 | blocksize += get_block_size(&elemcat->blocks[j*elemcat->block_count+k]); |
272 | } |
273 | sum = blocksum / blocksize; |
274 | if (elemcat->av_elem) { |
275 | sum -= 128 * precfactor * denom; |
276 | } else { |
277 | blocksum = 0; |
278 | blocksize = 0; |
279 | for (; k < elemcat->block_count; k++) { |
280 | blocksum += get_block_sum(sc, intpic, &elemcat->blocks[j*elemcat->block_count+k]); |
281 | blocksize += get_block_size(&elemcat->blocks[j*elemcat->block_count+k]); |
282 | } |
283 | sum -= blocksum / blocksize; |
284 | conflist[g++] = FFABS(sum * 8 / (precfactor * denom)); |
285 | } |
286 | |
287 | elemsignature[j] = sum; |
288 | sortsignature[j] = FFABS(sum); |
289 | } |
290 | |
291 | /* get threshold */ |
292 | qsort(sortsignature, elemcat->elem_count, sizeof(uint64_t), (void*) cmp); |
293 | th = sortsignature[(int) (elemcat->elem_count*0.333)]; |
294 | |
295 | /* ternarize */ |
296 | for (j = 0; j < elemcat->elem_count; j++) { |
297 | if (elemsignature[j] < -th) { |
298 | ternary = 0; |
299 | } else if (elemsignature[j] <= th) { |
300 | ternary = 1; |
301 | } else { |
302 | ternary = 2; |
303 | } |
304 | fs->framesig[f/5] += ternary * pot3[f%5]; |
305 | |
306 | if (f == wordvec[w]) { |
307 | fs->words[s2usw[w]/5] += ternary * pot3[wordt2b[s2usw[w]/5]++]; |
308 | if (w < 24) |
309 | w++; |
310 | } |
311 | f++; |
312 | } |
313 | av_freep(&elemsignature); |
314 | av_freep(&sortsignature); |
315 | } |
316 | |
317 | /* confidence */ |
318 | qsort(conflist, DIFFELEM_SIZE, sizeof(uint64_t), (void*) cmp); |
319 | fs->confidence = FFMIN(conflist[DIFFELEM_SIZE/2], 255); |
320 | |
321 | /* coarsesignature */ |
322 | if (sc->coarsecount == 0) { |
323 | if (sc->curcoarsesig2) { |
324 | sc->curcoarsesig1 = av_mallocz(sizeof(CoarseSignature)); |
325 | if (!sc->curcoarsesig1) |
326 | return AVERROR(ENOMEM); |
327 | sc->curcoarsesig1->first = fs; |
328 | sc->curcoarsesig2->next = sc->curcoarsesig1; |
329 | sc->coarseend = sc->curcoarsesig1; |
330 | } |
331 | } |
332 | if (sc->coarsecount == 45) { |
333 | sc->midcoarse = 1; |
334 | sc->curcoarsesig2 = av_mallocz(sizeof(CoarseSignature)); |
335 | if (!sc->curcoarsesig2) |
336 | return AVERROR(ENOMEM); |
337 | sc->curcoarsesig2->first = fs; |
338 | sc->curcoarsesig1->next = sc->curcoarsesig2; |
339 | sc->coarseend = sc->curcoarsesig2; |
340 | } |
341 | for (i = 0; i < 5; i++) { |
342 | set_bit(sc->curcoarsesig1->data[i], fs->words[i]); |
343 | } |
344 | /* assuming the actual frame is the last */ |
345 | sc->curcoarsesig1->last = fs; |
346 | if (sc->midcoarse) { |
347 | for (i = 0; i < 5; i++) { |
348 | set_bit(sc->curcoarsesig2->data[i], fs->words[i]); |
349 | } |
350 | sc->curcoarsesig2->last = fs; |
351 | } |
352 | |
353 | sc->coarsecount = (sc->coarsecount+1)%90; |
354 | |
355 | /* debug printing finesignature */ |
356 | if (av_log_get_level() == AV_LOG_DEBUG) { |
357 | av_log(ctx, AV_LOG_DEBUG, "input %d, confidence: %d\n", FF_INLINK_IDX(inlink), fs->confidence); |
358 | |
359 | av_log(ctx, AV_LOG_DEBUG, "words:"); |
360 | for (i = 0; i < 5; i++) { |
361 | av_log(ctx, AV_LOG_DEBUG, " %d:", fs->words[i] ); |
362 | av_log(ctx, AV_LOG_DEBUG, " %d", fs->words[i] / pot3[0] ); |
363 | for (j = 1; j < 5; j++) |
364 | av_log(ctx, AV_LOG_DEBUG, ",%d", fs->words[i] % pot3[j-1] / pot3[j] ); |
365 | av_log(ctx, AV_LOG_DEBUG, ";"); |
366 | } |
367 | av_log(ctx, AV_LOG_DEBUG, "\n"); |
368 | |
369 | av_log(ctx, AV_LOG_DEBUG, "framesignature:"); |
370 | for (i = 0; i < SIGELEM_SIZE/5; i++) { |
371 | av_log(ctx, AV_LOG_DEBUG, " %d", fs->framesig[i] / pot3[0] ); |
372 | for (j = 1; j < 5; j++) |
373 | av_log(ctx, AV_LOG_DEBUG, ",%d", fs->framesig[i] % pot3[j-1] / pot3[j] ); |
374 | } |
375 | av_log(ctx, AV_LOG_DEBUG, "\n"); |
376 | } |
377 | |
378 | if (FF_INLINK_IDX(inlink) == 0) |
379 | return ff_filter_frame(inlink->dst->outputs[0], picref); |
380 | return 1; |
381 | } |
382 | |
383 | static int xml_export(AVFilterContext *ctx, StreamContext *sc, const char* filename) |
384 | { |
385 | FineSignature* fs; |
386 | CoarseSignature* cs; |
387 | int i, j; |
388 | FILE* f; |
389 | unsigned int pot3[5] = { 3*3*3*3, 3*3*3, 3*3, 3, 1 }; |
390 | |
391 | f = fopen(filename, "w"); |
392 | if (!f) { |
393 | int err = AVERROR(EINVAL); |
394 | char buf[128]; |
395 | av_strerror(err, buf, sizeof(buf)); |
396 | av_log(ctx, AV_LOG_ERROR, "cannot open xml file %s: %s\n", filename, buf); |
397 | return err; |
398 | } |
399 | |
400 | /* header */ |
401 | fprintf(f, "<?xml version='1.0' encoding='ASCII' ?>\n"); |
402 | fprintf(f, "<Mpeg7 xmlns=\"urn:mpeg:mpeg7:schema:2001\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"urn:mpeg:mpeg7:schema:2001 schema/Mpeg7-2001.xsd\">\n"); |
403 | fprintf(f, " <DescriptionUnit xsi:type=\"DescriptorCollectionType\">\n"); |
404 | fprintf(f, " <Descriptor xsi:type=\"VideoSignatureType\">\n"); |
405 | fprintf(f, " <VideoSignatureRegion>\n"); |
406 | fprintf(f, " <VideoSignatureSpatialRegion>\n"); |
407 | fprintf(f, " <Pixel>0 0 </Pixel>\n"); |
408 | fprintf(f, " <Pixel>%d %d </Pixel>\n", sc->w - 1, sc->h - 1); |
409 | fprintf(f, " </VideoSignatureSpatialRegion>\n"); |
410 | fprintf(f, " <StartFrameOfSpatialRegion>0</StartFrameOfSpatialRegion>\n"); |
411 | /* hoping num is 1, other values are vague */ |
412 | fprintf(f, " <MediaTimeUnit>%d</MediaTimeUnit>\n", sc->time_base.den / sc->time_base.num); |
413 | fprintf(f, " <MediaTimeOfSpatialRegion>\n"); |
414 | fprintf(f, " <StartMediaTimeOfSpatialRegion>0</StartMediaTimeOfSpatialRegion>\n"); |
415 | fprintf(f, " <EndMediaTimeOfSpatialRegion>%" PRIu64 "</EndMediaTimeOfSpatialRegion>\n", sc->coarseend->last->pts); |
416 | fprintf(f, " </MediaTimeOfSpatialRegion>\n"); |
417 | |
418 | /* coarsesignatures */ |
419 | for (cs = sc->coarsesiglist; cs; cs = cs->next) { |
420 | fprintf(f, " <VSVideoSegment>\n"); |
421 | fprintf(f, " <StartFrameOfSegment>%" PRIu32 "</StartFrameOfSegment>\n", cs->first->index); |
422 | fprintf(f, " <EndFrameOfSegment>%" PRIu32 "</EndFrameOfSegment>\n", cs->last->index); |
423 | fprintf(f, " <MediaTimeOfSegment>\n"); |
424 | fprintf(f, " <StartMediaTimeOfSegment>%" PRIu64 "</StartMediaTimeOfSegment>\n", cs->first->pts); |
425 | fprintf(f, " <EndMediaTimeOfSegment>%" PRIu64 "</EndMediaTimeOfSegment>\n", cs->last->pts); |
426 | fprintf(f, " </MediaTimeOfSegment>\n"); |
427 | for (i = 0; i < 5; i++) { |
428 | fprintf(f, " <BagOfWords>"); |
429 | for (j = 0; j < 31; j++) { |
430 | uint8_t n = cs->data[i][j]; |
431 | if (j < 30) { |
432 | fprintf(f, "%d %d %d %d %d %d %d %d ", (n & 0x80) >> 7, |
433 | (n & 0x40) >> 6, |
434 | (n & 0x20) >> 5, |
435 | (n & 0x10) >> 4, |
436 | (n & 0x08) >> 3, |
437 | (n & 0x04) >> 2, |
438 | (n & 0x02) >> 1, |
439 | (n & 0x01)); |
440 | } else { |
441 | /* print only 3 bit in last byte */ |
442 | fprintf(f, "%d %d %d ", (n & 0x80) >> 7, |
443 | (n & 0x40) >> 6, |
444 | (n & 0x20) >> 5); |
445 | } |
446 | } |
447 | fprintf(f, "</BagOfWords>\n"); |
448 | } |
449 | fprintf(f, " </VSVideoSegment>\n"); |
450 | } |
451 | |
452 | /* finesignatures */ |
453 | for (fs = sc->finesiglist; fs; fs = fs->next) { |
454 | fprintf(f, " <VideoFrame>\n"); |
455 | fprintf(f, " <MediaTimeOfFrame>%" PRIu64 "</MediaTimeOfFrame>\n", fs->pts); |
456 | /* confidence */ |
457 | fprintf(f, " <FrameConfidence>%d</FrameConfidence>\n", fs->confidence); |
458 | /* words */ |
459 | fprintf(f, " <Word>"); |
460 | for (i = 0; i < 5; i++) { |
461 | fprintf(f, "%d ", fs->words[i]); |
462 | if (i < 4) { |
463 | fprintf(f, " "); |
464 | } |
465 | } |
466 | fprintf(f, "</Word>\n"); |
467 | /* framesignature */ |
468 | fprintf(f, " <FrameSignature>"); |
469 | for (i = 0; i< SIGELEM_SIZE/5; i++) { |
470 | if (i > 0) { |
471 | fprintf(f, " "); |
472 | } |
473 | fprintf(f, "%d ", fs->framesig[i] / pot3[0]); |
474 | for (j = 1; j < 5; j++) |
475 | fprintf(f, " %d ", fs->framesig[i] % pot3[j-1] / pot3[j] ); |
476 | } |
477 | fprintf(f, "</FrameSignature>\n"); |
478 | fprintf(f, " </VideoFrame>\n"); |
479 | } |
480 | fprintf(f, " </VideoSignatureRegion>\n"); |
481 | fprintf(f, " </Descriptor>\n"); |
482 | fprintf(f, " </DescriptionUnit>\n"); |
483 | fprintf(f, "</Mpeg7>\n"); |
484 | |
485 | fclose(f); |
486 | return 0; |
487 | } |
488 | |
489 | static int binary_export(AVFilterContext *ctx, StreamContext *sc, const char* filename) |
490 | { |
491 | FILE* f; |
492 | FineSignature* fs; |
493 | CoarseSignature* cs; |
494 | uint32_t numofsegments = (sc->lastindex + 44)/45; |
495 | int i, j; |
496 | PutBitContext buf; |
497 | /* buffer + header + coarsesignatures + finesignature */ |
498 | int len = (512 + 6 * 32 + 3*16 + 2 + |
499 | numofsegments * (4*32 + 1 + 5*243) + |
500 | sc->lastindex * (2 + 32 + 6*8 + 608)) / 8; |
501 | uint8_t* buffer = av_malloc_array(len, sizeof(uint8_t)); |
502 | if (!buffer) |
503 | return AVERROR(ENOMEM); |
504 | |
505 | f = fopen(filename, "wb"); |
506 | if (!f) { |
507 | int err = AVERROR(EINVAL); |
508 | char buf[128]; |
509 | av_strerror(err, buf, sizeof(buf)); |
510 | av_log(ctx, AV_LOG_ERROR, "cannot open file %s: %s\n", filename, buf); |
511 | return err; |
512 | } |
513 | init_put_bits(&buf, buffer, len); |
514 | |
515 | put_bits32(&buf, 1); /* NumOfSpatial Regions, only 1 supported */ |
516 | put_bits(&buf, 1, 1); /* SpatialLocationFlag, always the whole image */ |
517 | put_bits32(&buf, 0); /* PixelX,1 PixelY,1, 0,0 */ |
518 | put_bits(&buf, 16, sc->w-1 & 0xFFFF); /* PixelX,2 */ |
519 | put_bits(&buf, 16, sc->h-1 & 0xFFFF); /* PixelY,2 */ |
520 | put_bits32(&buf, 0); /* StartFrameOfSpatialRegion */ |
521 | put_bits32(&buf, sc->lastindex); /* NumOfFrames */ |
522 | /* hoping num is 1, other values are vague */ |
523 | /* den/num might be greater than 16 bit, so cutting it */ |
524 | put_bits(&buf, 16, 0xFFFF & (sc->time_base.den / sc->time_base.num)); /* MediaTimeUnit */ |
525 | put_bits(&buf, 1, 1); /* MediaTimeFlagOfSpatialRegion */ |
526 | put_bits32(&buf, 0); /* StartMediaTimeOfSpatialRegion */ |
527 | put_bits32(&buf, 0xFFFFFFFF & sc->coarseend->last->pts); /* EndMediaTimeOfSpatialRegion */ |
528 | put_bits32(&buf, numofsegments); /* NumOfSegments */ |
529 | /* coarsesignatures */ |
530 | for (cs = sc->coarsesiglist; cs; cs = cs->next) { |
531 | put_bits32(&buf, cs->first->index); /* StartFrameOfSegment */ |
532 | put_bits32(&buf, cs->last->index); /* EndFrameOfSegment */ |
533 | put_bits(&buf, 1, 1); /* MediaTimeFlagOfSegment */ |
534 | put_bits32(&buf, 0xFFFFFFFF & cs->first->pts); /* StartMediaTimeOfSegment */ |
535 | put_bits32(&buf, 0xFFFFFFFF & cs->last->pts); /* EndMediaTimeOfSegment */ |
536 | for (i = 0; i < 5; i++) { |
537 | /* put 243 bits ( = 7 * 32 + 19 = 8 * 28 + 19) into buffer */ |
538 | for (j = 0; j < 30; j++) { |
539 | put_bits(&buf, 8, cs->data[i][j]); |
540 | } |
541 | put_bits(&buf, 3, cs->data[i][30] >> 5); |
542 | } |
543 | } |
544 | /* finesignatures */ |
545 | put_bits(&buf, 1, 0); /* CompressionFlag, only 0 supported */ |
546 | for (fs = sc->finesiglist; fs; fs = fs->next) { |
547 | put_bits(&buf, 1, 1); /* MediaTimeFlagOfFrame */ |
548 | put_bits32(&buf, 0xFFFFFFFF & fs->pts); /* MediaTimeOfFrame */ |
549 | put_bits(&buf, 8, fs->confidence); /* FrameConfidence */ |
550 | for (i = 0; i < 5; i++) { |
551 | put_bits(&buf, 8, fs->words[i]); /* Words */ |
552 | } |
553 | /* framesignature */ |
554 | for (i = 0; i < SIGELEM_SIZE/5; i++) { |
555 | put_bits(&buf, 8, fs->framesig[i]); |
556 | } |
557 | } |
558 | |
559 | avpriv_align_put_bits(&buf); |
560 | flush_put_bits(&buf); |
561 | fwrite(buffer, 1, put_bits_count(&buf)/8, f); |
562 | fclose(f); |
563 | av_freep(&buffer); |
564 | return 0; |
565 | } |
566 | |
567 | static int export(AVFilterContext *ctx, StreamContext *sc, int input) |
568 | { |
569 | SignatureContext* sic = ctx->priv; |
570 | char filename[1024]; |
571 | |
572 | if (sic->nb_inputs > 1) { |
573 | /* error already handled */ |
574 | av_assert0(av_get_frame_filename(filename, sizeof(filename), sic->filename, input) == 0); |
575 | } else { |
576 | strcpy(filename, sic->filename); |
577 | } |
578 | if (sic->format == FORMAT_XML) { |
579 | return xml_export(ctx, sc, filename); |
580 | } else { |
581 | return binary_export(ctx, sc, filename); |
582 | } |
583 | } |
584 | |
585 | static int request_frame(AVFilterLink *outlink) |
586 | { |
587 | AVFilterContext *ctx = outlink->src; |
588 | SignatureContext *sic = ctx->priv; |
589 | StreamContext *sc, *sc2; |
590 | MatchingInfo match; |
591 | int i, j, ret; |
592 | int lookup = 1; /* indicates wheather EOF of all files is reached */ |
593 | |
594 | /* process all inputs */ |
595 | for (i = 0; i < sic->nb_inputs; i++){ |
596 | sc = &(sic->streamcontexts[i]); |
597 | |
598 | ret = ff_request_frame(ctx->inputs[i]); |
599 | |
600 | /* return if unexpected error occurs in input stream */ |
601 | if (ret < 0 && ret != AVERROR_EOF) |
602 | return ret; |
603 | |
604 | /* export signature at EOF */ |
605 | if (ret == AVERROR_EOF && !sc->exported) { |
606 | /* export if wanted */ |
607 | if (strlen(sic->filename) > 0) { |
608 | if (export(ctx, sc, i) < 0) |
609 | return ret; |
610 | } |
611 | sc->exported = 1; |
612 | } |
613 | lookup &= sc->exported; |
614 | } |
615 | |
616 | /* signature lookup */ |
617 | if (lookup && sic->mode != MODE_OFF) { |
618 | /* iterate over every pair */ |
619 | for (i = 0; i < sic->nb_inputs; i++) { |
620 | sc = &(sic->streamcontexts[i]); |
621 | for (j = i+1; j < sic->nb_inputs; j++) { |
622 | sc2 = &(sic->streamcontexts[j]); |
623 | match = lookup_signatures(ctx, sic, sc, sc2, sic->mode); |
624 | if (match.score != 0) { |
625 | av_log(ctx, AV_LOG_INFO, "matching of video %d at %f and %d at %f, %d frames matching\n", |
626 | i, ((double) match.first->pts * sc->time_base.num) / sc->time_base.den, |
627 | j, ((double) match.second->pts * sc2->time_base.num) / sc2->time_base.den, |
628 | match.matchframes); |
629 | if (match.whole) |
630 | av_log(ctx, AV_LOG_INFO, "whole video matching\n"); |
631 | } else { |
632 | av_log(ctx, AV_LOG_INFO, "no matching of video %d and %d\n", i, j); |
633 | } |
634 | } |
635 | } |
636 | } |
637 | |
638 | return ret; |
639 | } |
640 | |
641 | static av_cold int init(AVFilterContext *ctx) |
642 | { |
643 | |
644 | SignatureContext *sic = ctx->priv; |
645 | StreamContext *sc; |
646 | int i, ret; |
647 | char tmp[1024]; |
648 | |
649 | sic->streamcontexts = av_mallocz(sic->nb_inputs * sizeof(StreamContext)); |
650 | if (!sic->streamcontexts) |
651 | return AVERROR(ENOMEM); |
652 | |
653 | for (i = 0; i < sic->nb_inputs; i++) { |
654 | AVFilterPad pad = { |
655 | .type = AVMEDIA_TYPE_VIDEO, |
656 | .name = av_asprintf("in%d", i), |
657 | .config_props = config_input, |
658 | .filter_frame = filter_frame, |
659 | }; |
660 | |
661 | if (!pad.name) |
662 | return AVERROR(ENOMEM); |
663 | |
664 | sc = &(sic->streamcontexts[i]); |
665 | |
666 | sc->lastindex = 0; |
667 | sc->finesiglist = av_mallocz(sizeof(FineSignature)); |
668 | if (!sc->finesiglist) |
669 | return AVERROR(ENOMEM); |
670 | sc->curfinesig = NULL; |
671 | |
672 | sc->coarsesiglist = av_mallocz(sizeof(CoarseSignature)); |
673 | if (!sc->coarsesiglist) |
674 | return AVERROR(ENOMEM); |
675 | sc->curcoarsesig1 = sc->coarsesiglist; |
676 | sc->coarseend = sc->coarsesiglist; |
677 | sc->coarsecount = 0; |
678 | sc->midcoarse = 0; |
679 | |
680 | if ((ret = ff_insert_inpad(ctx, i, &pad)) < 0) { |
681 | av_freep(&pad.name); |
682 | return ret; |
683 | } |
684 | } |
685 | |
686 | /* check filename */ |
687 | if (sic->nb_inputs > 1 && strlen(sic->filename) > 0 && av_get_frame_filename(tmp, sizeof(tmp), sic->filename, 0) == -1) { |
688 | av_log(ctx, AV_LOG_ERROR, "The filename must contain %%d or %%0nd, if you have more than one input.\n"); |
689 | return AVERROR(EINVAL); |
690 | } |
691 | |
692 | return 0; |
693 | } |
694 | |
695 | |
696 | |
697 | static av_cold void uninit(AVFilterContext *ctx) |
698 | { |
699 | SignatureContext *sic = ctx->priv; |
700 | StreamContext *sc; |
701 | void* tmp; |
702 | FineSignature* finsig; |
703 | CoarseSignature* cousig; |
704 | int i; |
705 | |
706 | |
707 | /* free the lists */ |
708 | if (sic->streamcontexts != NULL) { |
709 | for (i = 0; i < sic->nb_inputs; i++) { |
710 | sc = &(sic->streamcontexts[i]); |
711 | finsig = sc->finesiglist; |
712 | cousig = sc->coarsesiglist; |
713 | |
714 | while (finsig) { |
715 | tmp = finsig; |
716 | finsig = finsig->next; |
717 | av_freep(&tmp); |
718 | } |
719 | sc->finesiglist = NULL; |
720 | |
721 | while (cousig) { |
722 | tmp = cousig; |
723 | cousig = cousig->next; |
724 | av_freep(&tmp); |
725 | } |
726 | sc->coarsesiglist = NULL; |
727 | } |
728 | av_freep(&sic->streamcontexts); |
729 | } |
730 | } |
731 | |
732 | static int config_output(AVFilterLink *outlink) |
733 | { |
734 | AVFilterContext *ctx = outlink->src; |
735 | AVFilterLink *inlink = ctx->inputs[0]; |
736 | |
737 | outlink->time_base = inlink->time_base; |
738 | outlink->frame_rate = inlink->frame_rate; |
739 | outlink->sample_aspect_ratio = inlink->sample_aspect_ratio; |
740 | outlink->w = inlink->w; |
741 | outlink->h = inlink->h; |
742 | |
743 | return 0; |
744 | } |
745 | |
746 | static const AVFilterPad signature_outputs[] = { |
747 | { |
748 | .name = "default", |
749 | .type = AVMEDIA_TYPE_VIDEO, |
750 | .request_frame = request_frame, |
751 | .config_props = config_output, |
752 | }, |
753 | { NULL } |
754 | }; |
755 | |
756 | AVFilter ff_vf_signature = { |
757 | .name = "signature", |
758 | .description = NULL_IF_CONFIG_SMALL("Calculate the MPEG-7 video signature"), |
759 | .priv_size = sizeof(SignatureContext), |
760 | .priv_class = &signature_class, |
761 | .init = init, |
762 | .uninit = uninit, |
763 | .query_formats = query_formats, |
764 | .outputs = signature_outputs, |
765 | .inputs = NULL, |
766 | .flags = AVFILTER_FLAG_DYNAMIC_INPUTS, |
767 | }; |
768 |