signature_lookup.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580
  1. /*
  2. * Copyright (c) 2017 Gerion Entrup
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License along
  17. * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
  18. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  19. */
  20. /**
  21. * @file
  22. * MPEG-7 video signature calculation and lookup filter
  23. */
  24. #include "signature.h"
  25. #define HOUGH_MAX_OFFSET 90
  26. #define MAX_FRAMERATE 60
  27. #define DIR_PREV 0
  28. #define DIR_NEXT 1
  29. #define DIR_PREV_END 2
  30. #define DIR_NEXT_END 3
  31. #define STATUS_NULL 0
  32. #define STATUS_END_REACHED 1
  33. #define STATUS_BEGIN_REACHED 2
  34. static void fill_l1distlut(uint8_t lut[])
  35. {
  36. int i, j, tmp_i, tmp_j,count;
  37. uint8_t dist;
  38. for (i = 0, count = 0; i < 242; i++) {
  39. for (j = i + 1; j < 243; j++, count++) {
  40. /* ternary distance between i and j */
  41. dist = 0;
  42. tmp_i = i; tmp_j = j;
  43. do {
  44. dist += FFABS((tmp_j % 3) - (tmp_i % 3));
  45. tmp_j /= 3;
  46. tmp_i /= 3;
  47. } while (tmp_i > 0 || tmp_j > 0);
  48. lut[count] = dist;
  49. }
  50. }
  51. }
  52. static unsigned int intersection_word(const uint8_t *first, const uint8_t *second)
  53. {
  54. unsigned int val=0,i;
  55. for (i = 0; i < 28; i += 4) {
  56. val += av_popcount( (first[i] & second[i] ) << 24 |
  57. (first[i+1] & second[i+1]) << 16 |
  58. (first[i+2] & second[i+2]) << 8 |
  59. (first[i+3] & second[i+3]) );
  60. }
  61. val += av_popcount( (first[28] & second[28]) << 16 |
  62. (first[29] & second[29]) << 8 |
  63. (first[30] & second[30]) );
  64. return val;
  65. }
  66. static unsigned int union_word(const uint8_t *first, const uint8_t *second)
  67. {
  68. unsigned int val=0,i;
  69. for (i = 0; i < 28; i += 4) {
  70. val += av_popcount( (first[i] | second[i] ) << 24 |
  71. (first[i+1] | second[i+1]) << 16 |
  72. (first[i+2] | second[i+2]) << 8 |
  73. (first[i+3] | second[i+3]) );
  74. }
  75. val += av_popcount( (first[28] | second[28]) << 16 |
  76. (first[29] | second[29]) << 8 |
  77. (first[30] | second[30]) );
  78. return val;
  79. }
  80. static unsigned int get_l1dist(AVFilterContext *ctx, SignatureContext *sc, const uint8_t *first, const uint8_t *second)
  81. {
  82. unsigned int i;
  83. unsigned int dist = 0;
  84. uint8_t f, s;
  85. for (i = 0; i < SIGELEM_SIZE/5; i++) {
  86. if (first[i] != second[i]) {
  87. f = first[i];
  88. s = second[i];
  89. if (f > s) {
  90. /* little variation of gauss sum formula */
  91. dist += sc->l1distlut[243*242/2 - (243-s)*(242-s)/2 + f - s - 1];
  92. } else {
  93. dist += sc->l1distlut[243*242/2 - (243-f)*(242-f)/2 + s - f - 1];
  94. }
  95. }
  96. }
  97. return dist;
  98. }
  99. /**
  100. * calculates the jaccard distance and evaluates a pair of coarse signatures as good
  101. * @return 0 if pair is bad, 1 otherwise
  102. */
  103. static int get_jaccarddist(SignatureContext *sc, CoarseSignature *first, CoarseSignature *second)
  104. {
  105. int jaccarddist, i, composdist = 0, cwthcount = 0;
  106. for (i = 0; i < 5; i++) {
  107. if ((jaccarddist = intersection_word(first->data[i], second->data[i])) > 0) {
  108. jaccarddist /= union_word(first->data[i], second->data[i]);
  109. }
  110. if (jaccarddist >= sc->thworddist) {
  111. if (++cwthcount > 2) {
  112. /* more than half (5/2) of distances are too wide */
  113. return 0;
  114. }
  115. }
  116. composdist += jaccarddist;
  117. if (composdist > sc->thcomposdist) {
  118. return 0;
  119. }
  120. }
  121. return 1;
  122. }
  123. /**
  124. * step through the coarsesignatures as long as a good candidate is found
  125. * @return 0 if no candidate is found, 1 otherwise
  126. */
  127. static int find_next_coarsecandidate(SignatureContext *sc, CoarseSignature *secondstart, CoarseSignature **first, CoarseSignature **second, int start)
  128. {
  129. /* go one coarsesignature foreword */
  130. if (!start) {
  131. if ((*second)->next) {
  132. *second = (*second)->next;
  133. } else if ((*first)->next) {
  134. *second = secondstart;
  135. *first = (*first)->next;
  136. } else {
  137. return 0;
  138. }
  139. }
  140. while (1) {
  141. if (get_jaccarddist(sc, *first, *second))
  142. return 1;
  143. /* next signature */
  144. if ((*second)->next) {
  145. *second = (*second)->next;
  146. } else if ((*first)->next) {
  147. *second = secondstart;
  148. *first = (*first)->next;
  149. } else {
  150. return 0;
  151. }
  152. }
  153. }
  154. /**
  155. * compares framesignatures and sorts out signatures with a l1 distance above a given threshold.
  156. * Then tries to find out offset and differences between framerates with a hough transformation
  157. */
  158. static MatchingInfo* get_matching_parameters(AVFilterContext *ctx, SignatureContext *sc, FineSignature *first, FineSignature *second)
  159. {
  160. FineSignature *f, *s;
  161. size_t i, j, k, l, hmax = 0, score;
  162. int framerate, offset, l1dist;
  163. double m;
  164. MatchingInfo *cands = NULL, *c = NULL;
  165. struct {
  166. uint8_t size;
  167. unsigned int dist;
  168. FineSignature *a;
  169. uint8_t b_pos[COARSE_SIZE];
  170. FineSignature *b[COARSE_SIZE];
  171. } pairs[COARSE_SIZE];
  172. typedef struct hspace_elem {
  173. int dist;
  174. size_t score;
  175. FineSignature *a;
  176. FineSignature *b;
  177. } hspace_elem;
  178. /* houghspace */
  179. hspace_elem** hspace = av_malloc_array(MAX_FRAMERATE, sizeof(hspace_elem *));
  180. /* initialize houghspace */
  181. for (i = 0; i < MAX_FRAMERATE; i++) {
  182. hspace[i] = av_malloc_array(2 * HOUGH_MAX_OFFSET + 1, sizeof(hspace_elem));
  183. for (j = 0; j < HOUGH_MAX_OFFSET; j++) {
  184. hspace[i][j].score = 0;
  185. hspace[i][j].dist = 99999;
  186. }
  187. }
  188. /* l1 distances */
  189. for (i = 0, f = first; i < COARSE_SIZE && f->next; i++, f = f->next) {
  190. pairs[i].size = 0;
  191. pairs[i].dist = 99999;
  192. pairs[i].a = f;
  193. for (j = 0, s = second; j < COARSE_SIZE && s->next; j++, s = s->next) {
  194. /* l1 distance of finesignature */
  195. l1dist = get_l1dist(ctx, sc, f->framesig, s->framesig);
  196. if (l1dist < sc->thl1) {
  197. if (l1dist < pairs[i].dist) {
  198. pairs[i].size = 1;
  199. pairs[i].dist = l1dist;
  200. pairs[i].b_pos[0] = j;
  201. pairs[i].b[0] = s;
  202. } else if (l1dist == pairs[i].dist) {
  203. pairs[i].b[pairs[i].size] = s;
  204. pairs[i].b_pos[pairs[i].size] = j;
  205. pairs[i].size++;
  206. }
  207. }
  208. }
  209. }
  210. /* last incomplete coarsesignature */
  211. if (f->next == NULL) {
  212. for (; i < COARSE_SIZE; i++) {
  213. pairs[i].size = 0;
  214. pairs[i].dist = 99999;
  215. }
  216. }
  217. /* hough transformation */
  218. for (i = 0; i < COARSE_SIZE; i++) {
  219. for (j = 0; j < pairs[i].size; j++) {
  220. for (k = i + 1; k < COARSE_SIZE; k++) {
  221. for (l = 0; l < pairs[k].size; l++) {
  222. if (pairs[i].b[j] != pairs[k].b[l]) {
  223. /* linear regression */
  224. m = (pairs[k].b_pos[l]-pairs[i].b_pos[j]) / (k-i); /* good value between 0.0 - 2.0 */
  225. framerate = (int) m*30 + 0.5; /* round up to 0 - 60 */
  226. if (framerate>0 && framerate <= MAX_FRAMERATE) {
  227. offset = pairs[i].b_pos[j] - ((int) m*i + 0.5); /* only second part has to be rounded up */
  228. if (offset > -HOUGH_MAX_OFFSET && offset < HOUGH_MAX_OFFSET) {
  229. if (pairs[i].dist < pairs[k].dist) {
  230. if (pairs[i].dist < hspace[framerate-1][offset+HOUGH_MAX_OFFSET].dist) {
  231. hspace[framerate-1][offset+HOUGH_MAX_OFFSET].dist = pairs[i].dist;
  232. hspace[framerate-1][offset+HOUGH_MAX_OFFSET].a = pairs[i].a;
  233. hspace[framerate-1][offset+HOUGH_MAX_OFFSET].b = pairs[i].b[j];
  234. }
  235. } else {
  236. if (pairs[k].dist < hspace[framerate-1][offset+HOUGH_MAX_OFFSET].dist) {
  237. hspace[framerate-1][offset+HOUGH_MAX_OFFSET].dist = pairs[k].dist;
  238. hspace[framerate-1][offset+HOUGH_MAX_OFFSET].a = pairs[k].a;
  239. hspace[framerate-1][offset+HOUGH_MAX_OFFSET].b = pairs[k].b[l];
  240. }
  241. }
  242. score = hspace[framerate-1][offset+HOUGH_MAX_OFFSET].score + 1;
  243. if (score > hmax )
  244. hmax = score;
  245. hspace[framerate-1][offset+HOUGH_MAX_OFFSET].score = score;
  246. }
  247. }
  248. }
  249. }
  250. }
  251. }
  252. }
  253. if (hmax > 0) {
  254. hmax = (int) (0.7*hmax);
  255. for (i = 0; i < MAX_FRAMERATE; i++) {
  256. for (j = 0; j < HOUGH_MAX_OFFSET; j++) {
  257. if (hmax < hspace[i][j].score) {
  258. if (c == NULL) {
  259. c = av_malloc(sizeof(MatchingInfo));
  260. if (!c)
  261. av_log(ctx, AV_LOG_FATAL, "Could not allocate memory");
  262. cands = c;
  263. } else {
  264. c->next = av_malloc(sizeof(MatchingInfo));
  265. if (!c->next)
  266. av_log(ctx, AV_LOG_FATAL, "Could not allocate memory");
  267. c = c->next;
  268. }
  269. c->framerateratio = (i+1.0) / 30;
  270. c->score = hspace[i][j].score;
  271. c->offset = j-90;
  272. c->first = hspace[i][j].a;
  273. c->second = hspace[i][j].b;
  274. c->next = NULL;
  275. /* not used */
  276. c->meandist = 0;
  277. c->matchframes = 0;
  278. c->whole = 0;
  279. }
  280. }
  281. }
  282. }
  283. for (i = 0; i < MAX_FRAMERATE; i++) {
  284. av_freep(&hspace[i]);
  285. }
  286. av_freep(&hspace);
  287. return cands;
  288. }
  289. static int iterate_frame(double frr, FineSignature **a, FineSignature **b, int fcount, int *bcount, int dir)
  290. {
  291. int step;
  292. /* between 1 and 2, because frr is between 1 and 2 */
  293. step = ((int) 0.5 + fcount * frr) /* current frame */
  294. -((int) 0.5 + (fcount-1) * frr);/* last frame */
  295. if (dir == DIR_NEXT) {
  296. if (frr >= 1.0) {
  297. if ((*a)->next) {
  298. *a = (*a)->next;
  299. } else {
  300. return DIR_NEXT_END;
  301. }
  302. if (step == 1) {
  303. if ((*b)->next) {
  304. *b = (*b)->next;
  305. (*bcount)++;
  306. } else {
  307. return DIR_NEXT_END;
  308. }
  309. } else {
  310. if ((*b)->next && (*b)->next->next) {
  311. *b = (*b)->next->next;
  312. (*bcount)++;
  313. } else {
  314. return DIR_NEXT_END;
  315. }
  316. }
  317. } else {
  318. if ((*b)->next) {
  319. *b = (*b)->next;
  320. (*bcount)++;
  321. } else {
  322. return DIR_NEXT_END;
  323. }
  324. if (step == 1) {
  325. if ((*a)->next) {
  326. *a = (*a)->next;
  327. } else {
  328. return DIR_NEXT_END;
  329. }
  330. } else {
  331. if ((*a)->next && (*a)->next->next) {
  332. *a = (*a)->next->next;
  333. } else {
  334. return DIR_NEXT_END;
  335. }
  336. }
  337. }
  338. return DIR_NEXT;
  339. } else {
  340. if (frr >= 1.0) {
  341. if ((*a)->prev) {
  342. *a = (*a)->prev;
  343. } else {
  344. return DIR_PREV_END;
  345. }
  346. if (step == 1) {
  347. if ((*b)->prev) {
  348. *b = (*b)->prev;
  349. (*bcount)++;
  350. } else {
  351. return DIR_PREV_END;
  352. }
  353. } else {
  354. if ((*b)->prev && (*b)->prev->prev) {
  355. *b = (*b)->prev->prev;
  356. (*bcount)++;
  357. } else {
  358. return DIR_PREV_END;
  359. }
  360. }
  361. } else {
  362. if ((*b)->prev) {
  363. *b = (*b)->prev;
  364. (*bcount)++;
  365. } else {
  366. return DIR_PREV_END;
  367. }
  368. if (step == 1) {
  369. if ((*a)->prev) {
  370. *a = (*a)->prev;
  371. } else {
  372. return DIR_PREV_END;
  373. }
  374. } else {
  375. if ((*a)->prev && (*a)->prev->prev) {
  376. *a = (*a)->prev->prev;
  377. } else {
  378. return DIR_PREV_END;
  379. }
  380. }
  381. }
  382. return DIR_PREV;
  383. }
  384. }
  385. static MatchingInfo evaluate_parameters(AVFilterContext *ctx, SignatureContext *sc, MatchingInfo *infos, MatchingInfo bestmatch, int mode)
  386. {
  387. int dist, distsum = 0, bcount = 1, dir = DIR_NEXT;
  388. int fcount = 0, goodfcount = 0, gooda = 0, goodb = 0;
  389. double meandist, minmeandist = bestmatch.meandist;
  390. int tolerancecount = 0;
  391. FineSignature *a, *b, *aprev, *bprev;
  392. int status = STATUS_NULL;
  393. for (; infos != NULL; infos = infos->next) {
  394. a = infos->first;
  395. b = infos->second;
  396. while (1) {
  397. dist = get_l1dist(ctx, sc, a->framesig, b->framesig);
  398. if (dist > sc->thl1) {
  399. if (a->confidence >= 1 || b->confidence >= 1) {
  400. /* bad frame (because high different information) */
  401. tolerancecount++;
  402. }
  403. if (tolerancecount > 2) {
  404. a = aprev;
  405. b = bprev;
  406. if (dir == DIR_NEXT) {
  407. /* turn around */
  408. a = infos->first;
  409. b = infos->second;
  410. dir = DIR_PREV;
  411. } else {
  412. break;
  413. }
  414. }
  415. } else {
  416. /* good frame */
  417. distsum += dist;
  418. goodfcount++;
  419. tolerancecount=0;
  420. aprev = a;
  421. bprev = b;
  422. if (a->confidence < 1) gooda++;
  423. if (b->confidence < 1) goodb++;
  424. }
  425. fcount++;
  426. dir = iterate_frame(infos->framerateratio, &a, &b, fcount, &bcount, dir);
  427. if (dir == DIR_NEXT_END) {
  428. status = STATUS_END_REACHED;
  429. a = infos->first;
  430. b = infos->second;
  431. dir = iterate_frame(infos->framerateratio, &a, &b, fcount, &bcount, DIR_PREV);
  432. }
  433. if (dir == DIR_PREV_END) {
  434. status |= STATUS_BEGIN_REACHED;
  435. break;
  436. }
  437. if (sc->thdi != 0 && bcount >= sc->thdi) {
  438. break; /* enough frames found */
  439. }
  440. }
  441. if (bcount < sc->thdi)
  442. continue; /* matching sequence is too short */
  443. if ((double) goodfcount / (double) fcount < sc->thit)
  444. continue;
  445. if ((double) goodfcount*0.5 < FFMAX(gooda, goodb))
  446. continue;
  447. meandist = (double) goodfcount / (double) distsum;
  448. if (meandist < minmeandist ||
  449. status == STATUS_END_REACHED | STATUS_BEGIN_REACHED ||
  450. mode == MODE_FAST){
  451. minmeandist = meandist;
  452. /* bestcandidate in this iteration */
  453. bestmatch.meandist = meandist;
  454. bestmatch.matchframes = bcount;
  455. bestmatch.framerateratio = infos->framerateratio;
  456. bestmatch.score = infos->score;
  457. bestmatch.offset = infos->offset;
  458. bestmatch.first = infos->first;
  459. bestmatch.second = infos->second;
  460. bestmatch.whole = 0; /* will be set to true later */
  461. bestmatch.next = NULL;
  462. }
  463. /* whole sequence is automatically best match */
  464. if (status == (STATUS_END_REACHED | STATUS_BEGIN_REACHED)) {
  465. bestmatch.whole = 1;
  466. break;
  467. }
  468. /* first matching sequence is enough, finding the best one is not necessary */
  469. if (mode == MODE_FAST) {
  470. break;
  471. }
  472. }
  473. return bestmatch;
  474. }
  475. static void sll_free(MatchingInfo *sll)
  476. {
  477. void *tmp;
  478. while (sll) {
  479. tmp = sll;
  480. sll = sll->next;
  481. av_freep(&tmp);
  482. }
  483. }
  484. static MatchingInfo lookup_signatures(AVFilterContext *ctx, SignatureContext *sc, StreamContext *first, StreamContext *second, int mode)
  485. {
  486. CoarseSignature *cs, *cs2;
  487. MatchingInfo *infos;
  488. MatchingInfo bestmatch;
  489. MatchingInfo *i;
  490. cs = first->coarsesiglist;
  491. cs2 = second->coarsesiglist;
  492. /* score of bestmatch is 0, if no match is found */
  493. bestmatch.score = 0;
  494. bestmatch.meandist = 99999;
  495. bestmatch.whole = 0;
  496. fill_l1distlut(sc->l1distlut);
  497. /* stage 1: coarsesignature matching */
  498. if (find_next_coarsecandidate(sc, second->coarsesiglist, &cs, &cs2, 1) == 0)
  499. return bestmatch; /* no candidate found */
  500. do {
  501. av_log(ctx, AV_LOG_DEBUG, "Stage 1: got coarsesignature pair. "
  502. "indices of first frame: %"PRIu32" and %"PRIu32"\n",
  503. cs->first->index, cs2->first->index);
  504. /* stage 2: l1-distance and hough-transform */
  505. av_log(ctx, AV_LOG_DEBUG, "Stage 2: calculate matching parameters\n");
  506. infos = get_matching_parameters(ctx, sc, cs->first, cs2->first);
  507. if (av_log_get_level() == AV_LOG_DEBUG) {
  508. for (i = infos; i != NULL; i = i->next) {
  509. av_log(ctx, AV_LOG_DEBUG, "Stage 2: matching pair at %"PRIu32" and %"PRIu32", "
  510. "ratio %f, offset %d\n", i->first->index, i->second->index,
  511. i->framerateratio, i->offset);
  512. }
  513. }
  514. /* stage 3: evaluation */
  515. av_log(ctx, AV_LOG_DEBUG, "Stage 3: evaluate\n");
  516. if (infos) {
  517. bestmatch = evaluate_parameters(ctx, sc, infos, bestmatch, mode);
  518. av_log(ctx, AV_LOG_DEBUG, "Stage 3: best matching pair at %"PRIu32" and %"PRIu32", "
  519. "ratio %f, offset %d, score %d, %d frames matching\n",
  520. bestmatch.first->index, bestmatch.second->index,
  521. bestmatch.framerateratio, bestmatch.offset, bestmatch.score, bestmatch.matchframes);
  522. sll_free(infos);
  523. }
  524. } while (find_next_coarsecandidate(sc, second->coarsesiglist, &cs, &cs2, 0) && !bestmatch.whole);
  525. return bestmatch;
  526. }