summaryrefslogtreecommitdiff
path: root/libavfilter/vf_signature.c (plain)
blob: 06b1b910d4d8500ab1d11b143f62fd3f6d430481
1/*
2 * Copyright (c) 2017 Gerion Entrup
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 */
20
21/**
22 * @file
23 * MPEG-7 video signature calculation and lookup filter
24 * @see http://epubs.surrey.ac.uk/531590/1/MPEG-7%20Video%20Signature%20Author%27s%20Copy.pdf
25 */
26
27#include <float.h>
28#include "libavcodec/put_bits.h"
29#include "libavformat/avformat.h"
30#include "libavutil/opt.h"
31#include "libavutil/avstring.h"
32#include "libavutil/intreadwrite.h"
33#include "libavutil/timestamp.h"
34#include "avfilter.h"
35#include "internal.h"
36#include "signature.h"
37#include "signature_lookup.c"
38
39#define OFFSET(x) offsetof(SignatureContext, x)
40#define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
41#define BLOCK_LCM (int64_t) 476985600
42
43static const AVOption signature_options[] = {
44 { "detectmode", "set the detectmode",
45 OFFSET(mode), AV_OPT_TYPE_INT, {.i64 = MODE_OFF}, 0, NB_LOOKUP_MODE-1, FLAGS, "mode" },
46 { "off", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MODE_OFF}, 0, 0, .flags = FLAGS, "mode" },
47 { "full", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MODE_FULL}, 0, 0, .flags = FLAGS, "mode" },
48 { "fast", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MODE_FAST}, 0, 0, .flags = FLAGS, "mode" },
49 { "nb_inputs", "number of inputs",
50 OFFSET(nb_inputs), AV_OPT_TYPE_INT, {.i64 = 1}, 1, INT_MAX, FLAGS },
51 { "filename", "filename for output files",
52 OFFSET(filename), AV_OPT_TYPE_STRING, {.str = ""}, 0, NB_FORMATS-1, FLAGS },
53 { "format", "set output format",
54 OFFSET(format), AV_OPT_TYPE_INT, {.i64 = FORMAT_BINARY}, 0, 1, FLAGS , "format" },
55 { "binary", 0, 0, AV_OPT_TYPE_CONST, {.i64=FORMAT_BINARY}, 0, 0, FLAGS, "format" },
56 { "xml", 0, 0, AV_OPT_TYPE_CONST, {.i64=FORMAT_XML}, 0, 0, FLAGS, "format" },
57 { "th_d", "threshold to detect one word as similar",
58 OFFSET(thworddist), AV_OPT_TYPE_INT, {.i64 = 9000}, 1, INT_MAX, FLAGS },
59 { "th_dc", "threshold to detect all words as similar",
60 OFFSET(thcomposdist), AV_OPT_TYPE_INT, {.i64 = 60000}, 1, INT_MAX, FLAGS },
61 { "th_xh", "threshold to detect frames as similar",
62 OFFSET(thl1), AV_OPT_TYPE_INT, {.i64 = 116}, 1, INT_MAX, FLAGS },
63 { "th_di", "minimum length of matching sequence in frames",
64 OFFSET(thdi), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS },
65 { "th_it", "threshold for relation of good to all frames",
66 OFFSET(thit), AV_OPT_TYPE_DOUBLE, {.dbl = 0.5}, 0.0, 1.0, FLAGS },
67 { NULL }
68};
69
70AVFILTER_DEFINE_CLASS(signature);
71
72static int query_formats(AVFilterContext *ctx)
73{
74 /* all formats with a separate gray value */
75 static const enum AVPixelFormat pix_fmts[] = {
76 AV_PIX_FMT_GRAY8,
77 AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
78 AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
79 AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P,
80 AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUVJ420P,
81 AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ444P,
82 AV_PIX_FMT_YUVJ440P,
83 AV_PIX_FMT_NV12, AV_PIX_FMT_NV21,
84 AV_PIX_FMT_NONE
85 };
86
87 return ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
88}
89
90static int config_input(AVFilterLink *inlink)
91{
92 AVFilterContext *ctx = inlink->dst;
93 SignatureContext *sic = ctx->priv;
94 StreamContext *sc = &(sic->streamcontexts[FF_INLINK_IDX(inlink)]);
95
96 sc->time_base = inlink->time_base;
97 /* test for overflow */
98 sc->divide = (((uint64_t) inlink->w/32) * (inlink->w/32 + 1) * (inlink->h/32 * inlink->h/32 + 1) > INT64_MAX / (BLOCK_LCM * 255));
99 if (sc->divide) {
100 av_log(ctx, AV_LOG_WARNING, "Input dimension too high for precise calculation, numbers will be rounded.\n");
101 }
102 sc->w = inlink->w;
103 sc->h = inlink->h;
104 return 0;
105}
106
107static int get_block_size(const Block *b)
108{
109 return (b->to.y - b->up.y + 1) * (b->to.x - b->up.x + 1);
110}
111
112static uint64_t get_block_sum(StreamContext *sc, uint64_t intpic[32][32], const Block *b)
113{
114 uint64_t sum = 0;
115
116 int x0, y0, x1, y1;
117
118 x0 = b->up.x;
119 y0 = b->up.y;
120 x1 = b->to.x;
121 y1 = b->to.y;
122
123 if (x0-1 >= 0 && y0-1 >= 0) {
124 sum = intpic[y1][x1] + intpic[y0-1][x0-1] - intpic[y1][x0-1] - intpic[y0-1][x1];
125 } else if (x0-1 >= 0) {
126 sum = intpic[y1][x1] - intpic[y1][x0-1];
127 } else if (y0-1 >= 0) {
128 sum = intpic[y1][x1] - intpic[y0-1][x1];
129 } else {
130 sum = intpic[y1][x1];
131 }
132 return sum;
133}
134
135static int cmp(const uint64_t *a, const uint64_t *b)
136{
137 return *a < *b ? -1 : ( *a > *b ? 1 : 0 );
138}
139
140/**
141 * sets the bit at position pos to 1 in data
142 */
143static void set_bit(uint8_t* data, size_t pos)
144{
145 uint8_t mask = 1 << 7-(pos%8);
146 data[pos/8] |= mask;
147}
148
149static int filter_frame(AVFilterLink *inlink, AVFrame *picref)
150{
151 AVFilterContext *ctx = inlink->dst;
152 SignatureContext *sic = ctx->priv;
153 StreamContext *sc = &(sic->streamcontexts[FF_INLINK_IDX(inlink)]);
154 FineSignature* fs;
155
156 static const uint8_t pot3[5] = { 3*3*3*3, 3*3*3, 3*3, 3, 1 };
157 /* indexes of words : 210,217,219,274,334 44,175,233,270,273 57,70,103,237,269 100,285,295,337,354 101,102,111,275,296
158 s2usw = sorted to unsorted wordvec: 44 is at index 5, 57 at index 10...
159 */
160 static const unsigned int wordvec[25] = {44,57,70,100,101,102,103,111,175,210,217,219,233,237,269,270,273,274,275,285,295,296,334,337,354};
161 static const uint8_t s2usw[25] = { 5,10,11, 15, 20, 21, 12, 22, 6, 0, 1, 2, 7, 13, 14, 8, 9, 3, 23, 16, 17, 24, 4, 18, 19};
162
163 uint8_t wordt2b[5] = { 0, 0, 0, 0, 0 }; /* word ternary to binary */
164 uint64_t intpic[32][32];
165 uint64_t rowcount;
166 uint8_t *p = picref->data[0];
167 int inti, intj;
168 int *intjlut;
169
170 uint64_t conflist[DIFFELEM_SIZE];
171 int f = 0, g = 0, w = 0;
172 int32_t dh1 = 1, dh2 = 1, dw1 = 1, dw2 = 1, a, b;
173 int64_t denom;
174 int i, j, k, ternary;
175 uint64_t blocksum;
176 int blocksize;
177 int64_t th; /* threshold */
178 int64_t sum;
179
180 int64_t precfactor = (sc->divide) ? 65536 : BLOCK_LCM;
181
182 /* initialize fs */
183 if (sc->curfinesig) {
184 fs = av_mallocz(sizeof(FineSignature));
185 if (!fs)
186 return AVERROR(ENOMEM);
187 sc->curfinesig->next = fs;
188 fs->prev = sc->curfinesig;
189 sc->curfinesig = fs;
190 } else {
191 fs = sc->curfinesig = sc->finesiglist;
192 sc->curcoarsesig1->first = fs;
193 }
194
195 fs->pts = picref->pts;
196 fs->index = sc->lastindex++;
197
198 memset(intpic, 0, sizeof(uint64_t)*32*32);
199 intjlut = av_malloc_array(inlink->w, sizeof(int));
200 if (!intjlut)
201 return AVERROR(ENOMEM);
202 for (i = 0; i < inlink->w; i++) {
203 intjlut[i] = (i*32)/inlink->w;
204 }
205
206 for (i = 0; i < inlink->h; i++) {
207 inti = (i*32)/inlink->h;
208 for (j = 0; j < inlink->w; j++) {
209 intj = intjlut[j];
210 intpic[inti][intj] += p[j];
211 }
212 p += picref->linesize[0];
213 }
214 av_freep(&intjlut);
215
216 /* The following calculates a summed area table (intpic) and brings the numbers
217 * in intpic to the same denominator.
218 * So you only have to handle the numinator in the following sections.
219 */
220 dh1 = inlink->h / 32;
221 if (inlink->h % 32)
222 dh2 = dh1 + 1;
223 dw1 = inlink->w / 32;
224 if (inlink->w % 32)
225 dw2 = dw1 + 1;
226 denom = (sc->divide) ? dh1 * dh2 * dw1 * dw2 : 1;
227
228 for (i = 0; i < 32; i++) {
229 rowcount = 0;
230 a = 1;
231 if (dh2 > 1) {
232 a = ((inlink->h*(i+1))%32 == 0) ? (inlink->h*(i+1))/32 - 1 : (inlink->h*(i+1))/32;
233 a -= ((inlink->h*i)%32 == 0) ? (inlink->h*i)/32 - 1 : (inlink->h*i)/32;
234 a = (a == dh1)? dh2 : dh1;
235 }
236 for (j = 0; j < 32; j++) {
237 b = 1;
238 if (dw2 > 1) {
239 b = ((inlink->w*(j+1))%32 == 0) ? (inlink->w*(j+1))/32 - 1 : (inlink->w*(j+1))/32;
240 b -= ((inlink->w*j)%32 == 0) ? (inlink->w*j)/32 - 1 : (inlink->w*j)/32;
241 b = (b == dw1)? dw2 : dw1;
242 }
243 rowcount += intpic[i][j] * a * b * precfactor / denom;
244 if (i > 0) {
245 intpic[i][j] = intpic[i-1][j] + rowcount;
246 } else {
247 intpic[i][j] = rowcount;
248 }
249 }
250 }
251
252 denom = (sc->divide) ? 1 : dh1 * dh2 * dw1 * dw2;
253
254 for (i = 0; i < ELEMENT_COUNT; i++) {
255 const ElemCat* elemcat = elements[i];
256 int64_t* elemsignature;
257 uint64_t* sortsignature;
258
259 elemsignature = av_malloc_array(elemcat->elem_count, sizeof(int64_t));
260 if (!elemsignature)
261 return AVERROR(ENOMEM);
262 sortsignature = av_malloc_array(elemcat->elem_count, sizeof(int64_t));
263 if (!sortsignature)
264 return AVERROR(ENOMEM);
265
266 for (j = 0; j < elemcat->elem_count; j++) {
267 blocksum = 0;
268 blocksize = 0;
269 for (k = 0; k < elemcat->left_count; k++) {
270 blocksum += get_block_sum(sc, intpic, &elemcat->blocks[j*elemcat->block_count+k]);
271 blocksize += get_block_size(&elemcat->blocks[j*elemcat->block_count+k]);
272 }
273 sum = blocksum / blocksize;
274 if (elemcat->av_elem) {
275 sum -= 128 * precfactor * denom;
276 } else {
277 blocksum = 0;
278 blocksize = 0;
279 for (; k < elemcat->block_count; k++) {
280 blocksum += get_block_sum(sc, intpic, &elemcat->blocks[j*elemcat->block_count+k]);
281 blocksize += get_block_size(&elemcat->blocks[j*elemcat->block_count+k]);
282 }
283 sum -= blocksum / blocksize;
284 conflist[g++] = FFABS(sum * 8 / (precfactor * denom));
285 }
286
287 elemsignature[j] = sum;
288 sortsignature[j] = FFABS(sum);
289 }
290
291 /* get threshold */
292 qsort(sortsignature, elemcat->elem_count, sizeof(uint64_t), (void*) cmp);
293 th = sortsignature[(int) (elemcat->elem_count*0.333)];
294
295 /* ternarize */
296 for (j = 0; j < elemcat->elem_count; j++) {
297 if (elemsignature[j] < -th) {
298 ternary = 0;
299 } else if (elemsignature[j] <= th) {
300 ternary = 1;
301 } else {
302 ternary = 2;
303 }
304 fs->framesig[f/5] += ternary * pot3[f%5];
305
306 if (f == wordvec[w]) {
307 fs->words[s2usw[w]/5] += ternary * pot3[wordt2b[s2usw[w]/5]++];
308 if (w < 24)
309 w++;
310 }
311 f++;
312 }
313 av_freep(&elemsignature);
314 av_freep(&sortsignature);
315 }
316
317 /* confidence */
318 qsort(conflist, DIFFELEM_SIZE, sizeof(uint64_t), (void*) cmp);
319 fs->confidence = FFMIN(conflist[DIFFELEM_SIZE/2], 255);
320
321 /* coarsesignature */
322 if (sc->coarsecount == 0) {
323 if (sc->curcoarsesig2) {
324 sc->curcoarsesig1 = av_mallocz(sizeof(CoarseSignature));
325 if (!sc->curcoarsesig1)
326 return AVERROR(ENOMEM);
327 sc->curcoarsesig1->first = fs;
328 sc->curcoarsesig2->next = sc->curcoarsesig1;
329 sc->coarseend = sc->curcoarsesig1;
330 }
331 }
332 if (sc->coarsecount == 45) {
333 sc->midcoarse = 1;
334 sc->curcoarsesig2 = av_mallocz(sizeof(CoarseSignature));
335 if (!sc->curcoarsesig2)
336 return AVERROR(ENOMEM);
337 sc->curcoarsesig2->first = fs;
338 sc->curcoarsesig1->next = sc->curcoarsesig2;
339 sc->coarseend = sc->curcoarsesig2;
340 }
341 for (i = 0; i < 5; i++) {
342 set_bit(sc->curcoarsesig1->data[i], fs->words[i]);
343 }
344 /* assuming the actual frame is the last */
345 sc->curcoarsesig1->last = fs;
346 if (sc->midcoarse) {
347 for (i = 0; i < 5; i++) {
348 set_bit(sc->curcoarsesig2->data[i], fs->words[i]);
349 }
350 sc->curcoarsesig2->last = fs;
351 }
352
353 sc->coarsecount = (sc->coarsecount+1)%90;
354
355 /* debug printing finesignature */
356 if (av_log_get_level() == AV_LOG_DEBUG) {
357 av_log(ctx, AV_LOG_DEBUG, "input %d, confidence: %d\n", FF_INLINK_IDX(inlink), fs->confidence);
358
359 av_log(ctx, AV_LOG_DEBUG, "words:");
360 for (i = 0; i < 5; i++) {
361 av_log(ctx, AV_LOG_DEBUG, " %d:", fs->words[i] );
362 av_log(ctx, AV_LOG_DEBUG, " %d", fs->words[i] / pot3[0] );
363 for (j = 1; j < 5; j++)
364 av_log(ctx, AV_LOG_DEBUG, ",%d", fs->words[i] % pot3[j-1] / pot3[j] );
365 av_log(ctx, AV_LOG_DEBUG, ";");
366 }
367 av_log(ctx, AV_LOG_DEBUG, "\n");
368
369 av_log(ctx, AV_LOG_DEBUG, "framesignature:");
370 for (i = 0; i < SIGELEM_SIZE/5; i++) {
371 av_log(ctx, AV_LOG_DEBUG, " %d", fs->framesig[i] / pot3[0] );
372 for (j = 1; j < 5; j++)
373 av_log(ctx, AV_LOG_DEBUG, ",%d", fs->framesig[i] % pot3[j-1] / pot3[j] );
374 }
375 av_log(ctx, AV_LOG_DEBUG, "\n");
376 }
377
378 if (FF_INLINK_IDX(inlink) == 0)
379 return ff_filter_frame(inlink->dst->outputs[0], picref);
380 return 1;
381}
382
383static int xml_export(AVFilterContext *ctx, StreamContext *sc, const char* filename)
384{
385 FineSignature* fs;
386 CoarseSignature* cs;
387 int i, j;
388 FILE* f;
389 unsigned int pot3[5] = { 3*3*3*3, 3*3*3, 3*3, 3, 1 };
390
391 f = fopen(filename, "w");
392 if (!f) {
393 int err = AVERROR(EINVAL);
394 char buf[128];
395 av_strerror(err, buf, sizeof(buf));
396 av_log(ctx, AV_LOG_ERROR, "cannot open xml file %s: %s\n", filename, buf);
397 return err;
398 }
399
400 /* header */
401 fprintf(f, "<?xml version='1.0' encoding='ASCII' ?>\n");
402 fprintf(f, "<Mpeg7 xmlns=\"urn:mpeg:mpeg7:schema:2001\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"urn:mpeg:mpeg7:schema:2001 schema/Mpeg7-2001.xsd\">\n");
403 fprintf(f, " <DescriptionUnit xsi:type=\"DescriptorCollectionType\">\n");
404 fprintf(f, " <Descriptor xsi:type=\"VideoSignatureType\">\n");
405 fprintf(f, " <VideoSignatureRegion>\n");
406 fprintf(f, " <VideoSignatureSpatialRegion>\n");
407 fprintf(f, " <Pixel>0 0 </Pixel>\n");
408 fprintf(f, " <Pixel>%d %d </Pixel>\n", sc->w - 1, sc->h - 1);
409 fprintf(f, " </VideoSignatureSpatialRegion>\n");
410 fprintf(f, " <StartFrameOfSpatialRegion>0</StartFrameOfSpatialRegion>\n");
411 /* hoping num is 1, other values are vague */
412 fprintf(f, " <MediaTimeUnit>%d</MediaTimeUnit>\n", sc->time_base.den / sc->time_base.num);
413 fprintf(f, " <MediaTimeOfSpatialRegion>\n");
414 fprintf(f, " <StartMediaTimeOfSpatialRegion>0</StartMediaTimeOfSpatialRegion>\n");
415 fprintf(f, " <EndMediaTimeOfSpatialRegion>%" PRIu64 "</EndMediaTimeOfSpatialRegion>\n", sc->coarseend->last->pts);
416 fprintf(f, " </MediaTimeOfSpatialRegion>\n");
417
418 /* coarsesignatures */
419 for (cs = sc->coarsesiglist; cs; cs = cs->next) {
420 fprintf(f, " <VSVideoSegment>\n");
421 fprintf(f, " <StartFrameOfSegment>%" PRIu32 "</StartFrameOfSegment>\n", cs->first->index);
422 fprintf(f, " <EndFrameOfSegment>%" PRIu32 "</EndFrameOfSegment>\n", cs->last->index);
423 fprintf(f, " <MediaTimeOfSegment>\n");
424 fprintf(f, " <StartMediaTimeOfSegment>%" PRIu64 "</StartMediaTimeOfSegment>\n", cs->first->pts);
425 fprintf(f, " <EndMediaTimeOfSegment>%" PRIu64 "</EndMediaTimeOfSegment>\n", cs->last->pts);
426 fprintf(f, " </MediaTimeOfSegment>\n");
427 for (i = 0; i < 5; i++) {
428 fprintf(f, " <BagOfWords>");
429 for (j = 0; j < 31; j++) {
430 uint8_t n = cs->data[i][j];
431 if (j < 30) {
432 fprintf(f, "%d %d %d %d %d %d %d %d ", (n & 0x80) >> 7,
433 (n & 0x40) >> 6,
434 (n & 0x20) >> 5,
435 (n & 0x10) >> 4,
436 (n & 0x08) >> 3,
437 (n & 0x04) >> 2,
438 (n & 0x02) >> 1,
439 (n & 0x01));
440 } else {
441 /* print only 3 bit in last byte */
442 fprintf(f, "%d %d %d ", (n & 0x80) >> 7,
443 (n & 0x40) >> 6,
444 (n & 0x20) >> 5);
445 }
446 }
447 fprintf(f, "</BagOfWords>\n");
448 }
449 fprintf(f, " </VSVideoSegment>\n");
450 }
451
452 /* finesignatures */
453 for (fs = sc->finesiglist; fs; fs = fs->next) {
454 fprintf(f, " <VideoFrame>\n");
455 fprintf(f, " <MediaTimeOfFrame>%" PRIu64 "</MediaTimeOfFrame>\n", fs->pts);
456 /* confidence */
457 fprintf(f, " <FrameConfidence>%d</FrameConfidence>\n", fs->confidence);
458 /* words */
459 fprintf(f, " <Word>");
460 for (i = 0; i < 5; i++) {
461 fprintf(f, "%d ", fs->words[i]);
462 if (i < 4) {
463 fprintf(f, " ");
464 }
465 }
466 fprintf(f, "</Word>\n");
467 /* framesignature */
468 fprintf(f, " <FrameSignature>");
469 for (i = 0; i< SIGELEM_SIZE/5; i++) {
470 if (i > 0) {
471 fprintf(f, " ");
472 }
473 fprintf(f, "%d ", fs->framesig[i] / pot3[0]);
474 for (j = 1; j < 5; j++)
475 fprintf(f, " %d ", fs->framesig[i] % pot3[j-1] / pot3[j] );
476 }
477 fprintf(f, "</FrameSignature>\n");
478 fprintf(f, " </VideoFrame>\n");
479 }
480 fprintf(f, " </VideoSignatureRegion>\n");
481 fprintf(f, " </Descriptor>\n");
482 fprintf(f, " </DescriptionUnit>\n");
483 fprintf(f, "</Mpeg7>\n");
484
485 fclose(f);
486 return 0;
487}
488
489static int binary_export(AVFilterContext *ctx, StreamContext *sc, const char* filename)
490{
491 FILE* f;
492 FineSignature* fs;
493 CoarseSignature* cs;
494 uint32_t numofsegments = (sc->lastindex + 44)/45;
495 int i, j;
496 PutBitContext buf;
497 /* buffer + header + coarsesignatures + finesignature */
498 int len = (512 + 6 * 32 + 3*16 + 2 +
499 numofsegments * (4*32 + 1 + 5*243) +
500 sc->lastindex * (2 + 32 + 6*8 + 608)) / 8;
501 uint8_t* buffer = av_malloc_array(len, sizeof(uint8_t));
502 if (!buffer)
503 return AVERROR(ENOMEM);
504
505 f = fopen(filename, "wb");
506 if (!f) {
507 int err = AVERROR(EINVAL);
508 char buf[128];
509 av_strerror(err, buf, sizeof(buf));
510 av_log(ctx, AV_LOG_ERROR, "cannot open file %s: %s\n", filename, buf);
511 return err;
512 }
513 init_put_bits(&buf, buffer, len);
514
515 put_bits32(&buf, 1); /* NumOfSpatial Regions, only 1 supported */
516 put_bits(&buf, 1, 1); /* SpatialLocationFlag, always the whole image */
517 put_bits32(&buf, 0); /* PixelX,1 PixelY,1, 0,0 */
518 put_bits(&buf, 16, sc->w-1 & 0xFFFF); /* PixelX,2 */
519 put_bits(&buf, 16, sc->h-1 & 0xFFFF); /* PixelY,2 */
520 put_bits32(&buf, 0); /* StartFrameOfSpatialRegion */
521 put_bits32(&buf, sc->lastindex); /* NumOfFrames */
522 /* hoping num is 1, other values are vague */
523 /* den/num might be greater than 16 bit, so cutting it */
524 put_bits(&buf, 16, 0xFFFF & (sc->time_base.den / sc->time_base.num)); /* MediaTimeUnit */
525 put_bits(&buf, 1, 1); /* MediaTimeFlagOfSpatialRegion */
526 put_bits32(&buf, 0); /* StartMediaTimeOfSpatialRegion */
527 put_bits32(&buf, 0xFFFFFFFF & sc->coarseend->last->pts); /* EndMediaTimeOfSpatialRegion */
528 put_bits32(&buf, numofsegments); /* NumOfSegments */
529 /* coarsesignatures */
530 for (cs = sc->coarsesiglist; cs; cs = cs->next) {
531 put_bits32(&buf, cs->first->index); /* StartFrameOfSegment */
532 put_bits32(&buf, cs->last->index); /* EndFrameOfSegment */
533 put_bits(&buf, 1, 1); /* MediaTimeFlagOfSegment */
534 put_bits32(&buf, 0xFFFFFFFF & cs->first->pts); /* StartMediaTimeOfSegment */
535 put_bits32(&buf, 0xFFFFFFFF & cs->last->pts); /* EndMediaTimeOfSegment */
536 for (i = 0; i < 5; i++) {
537 /* put 243 bits ( = 7 * 32 + 19 = 8 * 28 + 19) into buffer */
538 for (j = 0; j < 30; j++) {
539 put_bits(&buf, 8, cs->data[i][j]);
540 }
541 put_bits(&buf, 3, cs->data[i][30] >> 5);
542 }
543 }
544 /* finesignatures */
545 put_bits(&buf, 1, 0); /* CompressionFlag, only 0 supported */
546 for (fs = sc->finesiglist; fs; fs = fs->next) {
547 put_bits(&buf, 1, 1); /* MediaTimeFlagOfFrame */
548 put_bits32(&buf, 0xFFFFFFFF & fs->pts); /* MediaTimeOfFrame */
549 put_bits(&buf, 8, fs->confidence); /* FrameConfidence */
550 for (i = 0; i < 5; i++) {
551 put_bits(&buf, 8, fs->words[i]); /* Words */
552 }
553 /* framesignature */
554 for (i = 0; i < SIGELEM_SIZE/5; i++) {
555 put_bits(&buf, 8, fs->framesig[i]);
556 }
557 }
558
559 avpriv_align_put_bits(&buf);
560 flush_put_bits(&buf);
561 fwrite(buffer, 1, put_bits_count(&buf)/8, f);
562 fclose(f);
563 av_freep(&buffer);
564 return 0;
565}
566
567static int export(AVFilterContext *ctx, StreamContext *sc, int input)
568{
569 SignatureContext* sic = ctx->priv;
570 char filename[1024];
571
572 if (sic->nb_inputs > 1) {
573 /* error already handled */
574 av_assert0(av_get_frame_filename(filename, sizeof(filename), sic->filename, input) == 0);
575 } else {
576 strcpy(filename, sic->filename);
577 }
578 if (sic->format == FORMAT_XML) {
579 return xml_export(ctx, sc, filename);
580 } else {
581 return binary_export(ctx, sc, filename);
582 }
583}
584
585static int request_frame(AVFilterLink *outlink)
586{
587 AVFilterContext *ctx = outlink->src;
588 SignatureContext *sic = ctx->priv;
589 StreamContext *sc, *sc2;
590 MatchingInfo match;
591 int i, j, ret;
592 int lookup = 1; /* indicates wheather EOF of all files is reached */
593
594 /* process all inputs */
595 for (i = 0; i < sic->nb_inputs; i++){
596 sc = &(sic->streamcontexts[i]);
597
598 ret = ff_request_frame(ctx->inputs[i]);
599
600 /* return if unexpected error occurs in input stream */
601 if (ret < 0 && ret != AVERROR_EOF)
602 return ret;
603
604 /* export signature at EOF */
605 if (ret == AVERROR_EOF && !sc->exported) {
606 /* export if wanted */
607 if (strlen(sic->filename) > 0) {
608 if (export(ctx, sc, i) < 0)
609 return ret;
610 }
611 sc->exported = 1;
612 }
613 lookup &= sc->exported;
614 }
615
616 /* signature lookup */
617 if (lookup && sic->mode != MODE_OFF) {
618 /* iterate over every pair */
619 for (i = 0; i < sic->nb_inputs; i++) {
620 sc = &(sic->streamcontexts[i]);
621 for (j = i+1; j < sic->nb_inputs; j++) {
622 sc2 = &(sic->streamcontexts[j]);
623 match = lookup_signatures(ctx, sic, sc, sc2, sic->mode);
624 if (match.score != 0) {
625 av_log(ctx, AV_LOG_INFO, "matching of video %d at %f and %d at %f, %d frames matching\n",
626 i, ((double) match.first->pts * sc->time_base.num) / sc->time_base.den,
627 j, ((double) match.second->pts * sc2->time_base.num) / sc2->time_base.den,
628 match.matchframes);
629 if (match.whole)
630 av_log(ctx, AV_LOG_INFO, "whole video matching\n");
631 } else {
632 av_log(ctx, AV_LOG_INFO, "no matching of video %d and %d\n", i, j);
633 }
634 }
635 }
636 }
637
638 return ret;
639}
640
641static av_cold int init(AVFilterContext *ctx)
642{
643
644 SignatureContext *sic = ctx->priv;
645 StreamContext *sc;
646 int i, ret;
647 char tmp[1024];
648
649 sic->streamcontexts = av_mallocz(sic->nb_inputs * sizeof(StreamContext));
650 if (!sic->streamcontexts)
651 return AVERROR(ENOMEM);
652
653 for (i = 0; i < sic->nb_inputs; i++) {
654 AVFilterPad pad = {
655 .type = AVMEDIA_TYPE_VIDEO,
656 .name = av_asprintf("in%d", i),
657 .config_props = config_input,
658 .filter_frame = filter_frame,
659 };
660
661 if (!pad.name)
662 return AVERROR(ENOMEM);
663
664 sc = &(sic->streamcontexts[i]);
665
666 sc->lastindex = 0;
667 sc->finesiglist = av_mallocz(sizeof(FineSignature));
668 if (!sc->finesiglist)
669 return AVERROR(ENOMEM);
670 sc->curfinesig = NULL;
671
672 sc->coarsesiglist = av_mallocz(sizeof(CoarseSignature));
673 if (!sc->coarsesiglist)
674 return AVERROR(ENOMEM);
675 sc->curcoarsesig1 = sc->coarsesiglist;
676 sc->coarseend = sc->coarsesiglist;
677 sc->coarsecount = 0;
678 sc->midcoarse = 0;
679
680 if ((ret = ff_insert_inpad(ctx, i, &pad)) < 0) {
681 av_freep(&pad.name);
682 return ret;
683 }
684 }
685
686 /* check filename */
687 if (sic->nb_inputs > 1 && strlen(sic->filename) > 0 && av_get_frame_filename(tmp, sizeof(tmp), sic->filename, 0) == -1) {
688 av_log(ctx, AV_LOG_ERROR, "The filename must contain %%d or %%0nd, if you have more than one input.\n");
689 return AVERROR(EINVAL);
690 }
691
692 return 0;
693}
694
695
696
697static av_cold void uninit(AVFilterContext *ctx)
698{
699 SignatureContext *sic = ctx->priv;
700 StreamContext *sc;
701 void* tmp;
702 FineSignature* finsig;
703 CoarseSignature* cousig;
704 int i;
705
706
707 /* free the lists */
708 if (sic->streamcontexts != NULL) {
709 for (i = 0; i < sic->nb_inputs; i++) {
710 sc = &(sic->streamcontexts[i]);
711 finsig = sc->finesiglist;
712 cousig = sc->coarsesiglist;
713
714 while (finsig) {
715 tmp = finsig;
716 finsig = finsig->next;
717 av_freep(&tmp);
718 }
719 sc->finesiglist = NULL;
720
721 while (cousig) {
722 tmp = cousig;
723 cousig = cousig->next;
724 av_freep(&tmp);
725 }
726 sc->coarsesiglist = NULL;
727 }
728 av_freep(&sic->streamcontexts);
729 }
730}
731
732static int config_output(AVFilterLink *outlink)
733{
734 AVFilterContext *ctx = outlink->src;
735 AVFilterLink *inlink = ctx->inputs[0];
736
737 outlink->time_base = inlink->time_base;
738 outlink->frame_rate = inlink->frame_rate;
739 outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
740 outlink->w = inlink->w;
741 outlink->h = inlink->h;
742
743 return 0;
744}
745
746static const AVFilterPad signature_outputs[] = {
747 {
748 .name = "default",
749 .type = AVMEDIA_TYPE_VIDEO,
750 .request_frame = request_frame,
751 .config_props = config_output,
752 },
753 { NULL }
754};
755
756AVFilter ff_vf_signature = {
757 .name = "signature",
758 .description = NULL_IF_CONFIG_SMALL("Calculate the MPEG-7 video signature"),
759 .priv_size = sizeof(SignatureContext),
760 .priv_class = &signature_class,
761 .init = init,
762 .uninit = uninit,
763 .query_formats = query_formats,
764 .outputs = signature_outputs,
765 .inputs = NULL,
766 .flags = AVFILTER_FLAG_DYNAMIC_INPUTS,
767};
768