strInvTransform.c 65 KB


  1. //*@@@+++@@@@******************************************************************
  2. //
  3. // Copyright © Microsoft Corp.
  4. // All rights reserved.
  5. //
  6. // Redistribution and use in source and binary forms, with or without
  7. // modification, are permitted provided that the following conditions are met:
  8. //
  9. // • Redistributions of source code must retain the above copyright notice,
  10. // this list of conditions and the following disclaimer.
  11. // • Redistributions in binary form must reproduce the above copyright notice,
  12. // this list of conditions and the following disclaimer in the documentation
  13. // and/or other materials provided with the distribution.
  14. //
  15. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  16. // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17. // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  18. // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  19. // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  20. // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  21. // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  22. // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  23. // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  24. // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  25. // POSSIBILITY OF SUCH DAMAGE.
  26. //
  27. //*@@@---@@@@******************************************************************
  28. #include "strTransform.h"
  29. #include "strcodec.h"
  30. #include "decode.h"
  31. /** rotation by -pi/8 **/
  32. #define IROTATE1(a, b) (a) -= (((b) + 1) >> 1), (b) += (((a) + 1) >> 1) // this works well too
  33. #define IROTATE2(a, b) (a) -= (((b)*3 + 4) >> 3), (b) += (((a)*3 + 4) >> 3) // this works well too
  34. /** local functions **/
  35. static Void invOddOdd(PixelI *, PixelI *, PixelI *, PixelI *);
  36. static Void invOddOddPost(PixelI *, PixelI *, PixelI *, PixelI *);
  37. static Void invOdd(PixelI *, PixelI *, PixelI *, PixelI *);
  38. static Void strHSTdec(PixelI *, PixelI *, PixelI *, PixelI *);
  39. static Void strHSTdec1(PixelI *, PixelI *);
  40. static Void strHSTdec1_alternate(PixelI *, PixelI *);
  41. static Void strHSTdec1_edge(PixelI *pa, PixelI *pd);
  42. /** IDCT stuff **/
  43. /** reordering should be combined with zigzag scan **/
  44. /** data order before IDCT **/
  45. /** 0 8 4 6 **/
  46. /** 2 10 14 12 **/
  47. /** 1 11 15 13 **/
  48. /** 9 3 7 5 **/
  49. /** data order after IDCT **/
  50. /** 0 1 2 3 **/
  51. /** 4 5 6 7 **/
  52. /** 8 9 10 11 **/
  53. /** 12 13 14 15 **/
  54. Void strIDCT4x4Stage1(PixelI* p)
  55. {
  56. /** top left corner, butterfly => butterfly **/
  57. strDCT2x2up(p + 0, p + 1, p + 2, p + 3);
  58. /** top right corner, -pi/8 rotation => butterfly **/
  59. invOdd(p + 5, p + 4, p + 7, p + 6);
  60. /** bottom left corner, butterfly => -pi/8 rotation **/
  61. invOdd(p + 10, p + 8, p + 11, p + 9);
  62. /** bottom right corner, -pi/8 rotation => -pi/8 rotation **/
  63. invOddOdd(p + 15, p + 14, p + 13, p + 12);
  64. /** butterfly **/
  65. //FOURBUTTERFLY(p, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15);
  66. FOURBUTTERFLY_HARDCODED1(p);
  67. }
  68. Void strIDCT4x4Stage2(PixelI* p)
  69. {
  70. /** bottom left corner, butterfly => -pi/8 rotation **/
  71. invOdd(p + 32, p + 48, p + 96, p + 112);
  72. /** top right corner, -pi/8 rotation => butterfly **/
  73. invOdd(p + 128, p + 192, p + 144, p + 208);
  74. /** bottom right corner, -pi/8 rotation => -pi/8 rotation **/
  75. invOddOdd(p + 160, p + 224, p + 176, p + 240);
  76. /** top left corner, butterfly => butterfly **/
  77. strDCT2x2up(p + 0, p + 64, p + 16, p + 80);
  78. /** butterfly **/
  79. FOURBUTTERFLY(p, 0, 192, 48, 240, 64, 128, 112, 176, 16, 208, 32, 224, 80, 144, 96, 160);
  80. }
  81. Void strNormalizeDec(PixelI* p, Bool bChroma)
  82. {
  83. int i;
  84. if (!bChroma) {
  85. //for (i = 0; i < 256; i += 16) {
  86. // p[i] <<= 2;
  87. //}
  88. }
  89. else {
  90. for (i = 0; i < 256; i += 16) {
  91. p[i] += p[i];
  92. }
  93. }
  94. }
  95. /** 2x2 DCT with post-scaling - for use on decoder side **/
  96. Void strDCT2x2dnDec(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
  97. {
  98. PixelI a, b, c, d, C, t;
  99. a = *pa;
  100. b = *pb;
  101. C = *pc;
  102. d = *pd;
  103. a += d;
  104. b -= C;
  105. t = ((a - b) >> 1);
  106. c = t - d;
  107. d = t - C;
  108. a -= d;
  109. b += c;
  110. *pa = a * 2;
  111. *pb = b * 2;
  112. *pc = c * 2;
  113. *pd = d * 2;
  114. }
  115. /** post filter stuff **/
  116. /** 2-point post for boundaries **/
  117. Void strPost2(PixelI * a, PixelI * b)
  118. {
  119. *b += ((*a + 4) >> 3);
  120. *a += ((*b + 2) >> 2);
  121. *b += ((*a + 4) >> 3);
  122. }
  123. Void strPost2_alternate(PixelI * pa, PixelI * pb)
  124. {
  125. PixelI a, b;
  126. a = *pa;
  127. b = *pb;
  128. /** rotate **/
  129. b += ((a + 2) >> 2);
  130. a += ((b + 1) >> 1);
  131. a += (b >> 5);
  132. a += (b >> 9);
  133. a += (b >> 13);
  134. b += ((a + 2) >> 2);
  135. *pa = a;
  136. *pb = b;
  137. }
  138. Void strPost2x2(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
  139. {
  140. PixelI a, b, c, d;
  141. a = *pa;
  142. b = *pb;
  143. c = *pc;
  144. d = *pd;
  145. /** butterflies **/
  146. a += d;
  147. b += c;
  148. d -= (a + 1) >> 1;
  149. c -= (b + 1) >> 1;
  150. /** rotate **/
  151. b += ((a + 2) >> 2);
  152. a += ((b + 1) >> 1);
  153. b += ((a + 2) >> 2);
  154. /** butterflies **/
  155. d += (a + 1) >> 1;
  156. c += (b + 1) >> 1;
  157. a -= d;
  158. b -= c;
  159. *pa = a;
  160. *pb = b;
  161. *pc = c;
  162. *pd = d;
  163. }
  164. Void strPost2x2_alternate(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
  165. {
  166. PixelI a, b, c, d;
  167. a = *pa;
  168. b = *pb;
  169. c = *pc;
  170. d = *pd;
  171. /** butterflies **/
  172. a += d;
  173. b += c;
  174. d -= (a + 1) >> 1;
  175. c -= (b + 1) >> 1;
  176. /** rotate **/
  177. b += ((a + 2) >> 2);
  178. a += ((b + 1) >> 1);
  179. a += (b >> 5);
  180. a += (b >> 9);
  181. a += (b >> 13);
  182. b += ((a + 2) >> 2);
  183. /** butterflies **/
  184. d += (a + 1) >> 1;
  185. c += (b + 1) >> 1;
  186. a -= d;
  187. b -= c;
  188. *pa = a;
  189. *pb = b;
  190. *pc = c;
  191. *pd = d;
  192. }
  193. /** 4-point post for boundaries **/
  194. Void strPost4(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
  195. {
  196. PixelI a, b, c, d;
  197. a = *pa;
  198. b = *pb;
  199. c = *pc;
  200. d = *pd;
  201. a += d, b += c;
  202. d -= ((a + 1) >> 1), c -= ((b + 1) >> 1);
  203. IROTATE1(c, d);
  204. d += ((a + 1) >> 1), c += ((b + 1) >> 1);
  205. a -= d - ((d * 3 + 16) >> 5), b -= c - ((c * 3 + 16) >> 5);
  206. d += ((a * 3 + 8) >> 4), c += ((b * 3 + 8) >> 4);
  207. a += ((d * 3 + 16) >> 5), b += ((c * 3 + 16) >> 5);
  208. *pa = a;
  209. *pb = b;
  210. *pc = c;
  211. *pd = d;
  212. }
  213. Void strPost4_alternate(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
  214. {
  215. PixelI a, b, c, d;
  216. a = *pa;
  217. b = *pb;
  218. c = *pc;
  219. d = *pd;
  220. a += d, b += c;
  221. d -= ((a + 1) >> 1), c -= ((b + 1) >> 1);
  222. strHSTdec1_edge(&a, &d); strHSTdec1_edge(&b, &c);
  223. IROTATE1(c, d);
  224. d += ((a + 1) >> 1), c += ((b + 1) >> 1);
  225. a -= d, b -= c;
  226. *pa = a;
  227. *pb = b;
  228. *pc = c;
  229. *pd = d;
  230. }
  231. /*****************************************************************************************
  232. Input data offsets:
  233. (15)(14)|(10+64)(11+64) p0 (15)(14)|(74)(75)
  234. (13)(12)|( 8+64)( 9+64) (13)(12)|(72)(73)
  235. --------+-------------- --------+--------
  236. ( 5)( 4)|( 0+64) (1+64) p1 ( 5)( 4)|(64)(65)
  237. ( 7)( 6)|( 2+64) (3+64) ( 7)( 6)|(66)(67)
  238. *****************************************************************************************/
  239. Void DCCompensate (PixelI *a, PixelI *b, PixelI *c, PixelI *d, int iDC)
  240. {
  241. iDC = iDC>>1;
  242. *a -= iDC;
  243. *d -= iDC;
  244. *b += iDC;
  245. *c += iDC;
  246. }
  247. #ifndef max
  248. #define max(a,b) (((a) > (b)) ? (a) : (b))
  249. #endif
  250. #ifndef min
  251. #define min(a,b) (((a) < (b)) ? (a) : (b))
  252. #endif
  253. int ClipDCL(int iDCL, int iAltDCL)
  254. {
  255. int iClipDCL = 0;
  256. if (iDCL > 0) {
  257. if (iAltDCL > 0)
  258. iClipDCL = min(iDCL, iAltDCL);
  259. else
  260. iClipDCL = 0;
  261. }
  262. else if (iDCL < 0) {
  263. if (iAltDCL < 0)
  264. iClipDCL = max(iDCL, iAltDCL);
  265. else
  266. iClipDCL = 0;
  267. }
  268. return iClipDCL;
  269. }
  270. Void strPost4x4Stage1Split(PixelI *p0, PixelI *p1, Int iOffset, Int iHPQP, Bool bHPAbsent)
  271. {
  272. int iDCLAlt1, iDCLAlt2, iDCLAlt3, iDCLAlt0;
  273. int iDCL1, iDCL2, iDCL3, iDCL0;
  274. int iTmp1, iTmp2, iTmp3, iTmp0;
  275. PixelI *p2 = p0 + 72 - iOffset;
  276. PixelI *p3 = p1 + 64 - iOffset;
  277. p0 += 12;
  278. p1 += 4;
  279. /** buttefly **/
  280. strDCT2x2dn(p0 + 0, p2 + 0, p1 + 0, p3 + 0);
  281. strDCT2x2dn(p0 + 1, p2 + 1, p1 + 1, p3 + 1);
  282. strDCT2x2dn(p0 + 2, p2 + 2, p1 + 2, p3 + 2);
  283. strDCT2x2dn(p0 + 3, p2 + 3, p1 + 3, p3 + 3);
  284. /** bottom right corner: -pi/8 rotation => -pi/8 rotation **/
  285. invOddOddPost(p3 + 0, p3 + 1, p3 + 2, p3 + 3);
  286. /** anti diagonal corners: rotation by -pi/8 **/
  287. IROTATE1(p1[2], p1[3]);
  288. IROTATE1(p1[0], p1[1]);
  289. IROTATE1(p2[1], p2[3]);
  290. IROTATE1(p2[0], p2[2]);
  291. /** butterfly **/
  292. strHSTdec1(p0 + 0, p3 + 0);
  293. strHSTdec1(p0 + 1, p3 + 1);
  294. strHSTdec1(p0 + 2, p3 + 2);
  295. strHSTdec1(p0 + 3, p3 + 3);
  296. strHSTdec(p0 + 0, p2 + 0, p1 + 0, p3 + 0);
  297. strHSTdec(p0 + 1, p2 + 1, p1 + 1, p3 + 1);
  298. strHSTdec(p0 + 2, p2 + 2, p1 + 2, p3 + 2);
  299. strHSTdec(p0 + 3, p2 + 3, p1 + 3, p3 + 3);
  300. iTmp0 = (*(p0 +0) + *(p1 +0) + *(p2 +0) + *(p3 +0))>>1;
  301. iTmp1 = (*(p0 +1) + *(p1 +1) + *(p2 +1) + *(p3 +1))>>1;
  302. iTmp2 = (*(p0 +2) + *(p1 +2) + *(p2 +2) + *(p3 +2))>>1;
  303. iTmp3 = (*(p0 +3) + *(p1 +3) + *(p2 +3) + *(p3 +3))>>1;
  304. iDCL0 = (iTmp0 * 595 + 65536)>>17; //Approximating 27/5947
  305. iDCL1 = (iTmp1 * 595 + 65536)>>17;
  306. iDCL2 = (iTmp2 * 595 + 65536)>>17;
  307. iDCL3 = (iTmp3 * 595 + 65536)>>17;
  308. if ((abs(iDCL0) < iHPQP && iHPQP > 20) || bHPAbsent) {
  309. iDCLAlt0 = (*(p0 +0) - *(p1 +0) - *(p2 +0) + *(p3 +0))>>1;
  310. iDCL0 = ClipDCL (iDCL0, iDCLAlt0);
  311. DCCompensate (p0 + 0, p2 + 0, p1 + 0, p3 + 0, iDCL0);
  312. }
  313. if ((abs(iDCL1) < iHPQP && iHPQP > 20) || bHPAbsent) {
  314. iDCLAlt1 = (*(p0 +1) - *(p1 +1) - *(p2 +1) + *(p3 +1))>>1;
  315. iDCL1 = ClipDCL (iDCL1, iDCLAlt1);
  316. DCCompensate (p0 + 1, p2 + 1, p1 + 1, p3 + 1, iDCL1);
  317. }
  318. if ((abs(iDCL2) < iHPQP && iHPQP > 20) || bHPAbsent) {
  319. iDCLAlt2 = (*(p0 +2) - *(p1 +2) - *(p2 +2) + *(p3 +2))>>1;
  320. iDCL2 = ClipDCL (iDCL2, iDCLAlt2);
  321. DCCompensate (p0 + 2, p2 + 2, p1 + 2, p3 + 2, iDCL2);
  322. }
  323. if ((abs(iDCL3) < iHPQP && iHPQP > 20) || bHPAbsent) {
  324. iDCLAlt3 = (*(p0 +3) - *(p1 +3) - *(p2 +3) + *(p3 +3))>>1;
  325. iDCL3 = ClipDCL (iDCL3, iDCLAlt3);
  326. DCCompensate (p0 + 3, p2 + 3, p1 + 3, p3 + 3, iDCL3);
  327. }
  328. }
  329. Void strPost4x4Stage1(PixelI* p, Int iOffset, Int iHPQP, Bool bHPAbsent)
  330. {
  331. strPost4x4Stage1Split(p, p + 16, iOffset, iHPQP, bHPAbsent);
  332. }
  333. Void strPost4x4Stage1Split_alternate(PixelI *p0, PixelI *p1, Int iOffset)
  334. {
  335. PixelI *p2 = p0 + 72 - iOffset;
  336. PixelI *p3 = p1 + 64 - iOffset;
  337. p0 += 12;
  338. p1 += 4;
  339. /** buttefly **/
  340. strDCT2x2dn(p0 + 0, p2 + 0, p1 + 0, p3 + 0);
  341. strDCT2x2dn(p0 + 1, p2 + 1, p1 + 1, p3 + 1);
  342. strDCT2x2dn(p0 + 2, p2 + 2, p1 + 2, p3 + 2);
  343. strDCT2x2dn(p0 + 3, p2 + 3, p1 + 3, p3 + 3);
  344. /** bottom right corner: -pi/8 rotation => -pi/8 rotation **/
  345. invOddOddPost(p3 + 0, p3 + 1, p3 + 2, p3 + 3);
  346. /** anti diagonal corners: rotation by -pi/8 **/
  347. IROTATE1(p1[2], p1[3]);
  348. IROTATE1(p1[0], p1[1]);
  349. IROTATE1(p2[1], p2[3]);
  350. IROTATE1(p2[0], p2[2]);
  351. /** butterfly **/
  352. strHSTdec1_alternate(p0 + 0, p3 + 0);
  353. strHSTdec1_alternate(p0 + 1, p3 + 1);
  354. strHSTdec1_alternate(p0 + 2, p3 + 2);
  355. strHSTdec1_alternate(p0 + 3, p3 + 3);
  356. strHSTdec(p0 + 0, p2 + 0, p1 + 0, p3 + 0);
  357. strHSTdec(p0 + 1, p2 + 1, p1 + 1, p3 + 1);
  358. strHSTdec(p0 + 2, p2 + 2, p1 + 2, p3 + 2);
  359. strHSTdec(p0 + 3, p2 + 3, p1 + 3, p3 + 3);
  360. }
  361. Void strPost4x4Stage1_alternate(PixelI* p, Int iOffset)
  362. {
  363. strPost4x4Stage1Split_alternate(p, p + 16, iOffset);
  364. }
  365. /*****************************************************************************************
  366. Input data offsets:
  367. (15)(14)|(10+32)(11+32) p0 (15)(14)|(42)(43)
  368. (13)(12)|( 8+32)( 9+32) (13)(12)|(40)(41)
  369. --------+-------------- --------+--------
  370. ( 5)( 4)|( 0+32) (1+32) p1 ( 5)( 4)|(32)(33)
  371. ( 7)( 6)|( 2+32) (3+32) ( 7)( 6)|(34)(35)
  372. *****************************************************************************************/
  373. /*****************************************************************************************
  374. Input data offsets:
  375. ( -96)(-32)|(32)( 96) p0
  376. ( -80)(-16)|(48)(112)
  377. -----------+------------
  378. (-128)(-64)|( 0)( 64) p1
  379. (-112)(-48)|(16)( 80)
  380. *****************************************************************************************/
  381. Void strPost4x4Stage2Split(PixelI* p0, PixelI* p1)
  382. {
  383. /** buttefly **/
  384. strDCT2x2dn(p0 - 96, p0 + 96, p1 - 112, p1 + 80);
  385. strDCT2x2dn(p0 - 32, p0 + 32, p1 - 48, p1 + 16);
  386. strDCT2x2dn(p0 - 80, p0 + 112, p1 - 128, p1 + 64);
  387. strDCT2x2dn(p0 - 16, p0 + 48, p1 - 64, p1 + 0);
  388. /** bottom right corner: -pi/8 rotation => -pi/8 rotation **/
  389. invOddOddPost(p1 + 0, p1 + 64, p1 + 16, p1 + 80);
  390. /** anti diagonal corners: rotation by -pi/8 **/
  391. IROTATE1(p0[ 48], p0[ 32]);
  392. IROTATE1(p0[112], p0[ 96]);
  393. IROTATE1(p1[-64], p1[-128]);
  394. IROTATE1(p1[-48], p1[-112]);
  395. /** butterfly **/
  396. strHSTdec1(p0 - 96, p1 + 80);
  397. strHSTdec1(p0 - 32, p1 + 16);
  398. strHSTdec1(p0 - 80, p1 + 64);
  399. strHSTdec1(p0 - 16, p1 + 0);
  400. strHSTdec(p0 - 96, p1 - 112, p0 + 96, p1 + 80);
  401. strHSTdec(p0 - 32, p1 - 48, p0 + 32, p1 + 16);
  402. strHSTdec(p0 - 80, p1 - 128, p0 + 112, p1 + 64);
  403. strHSTdec(p0 - 16, p1 - 64, p0 + 48, p1 + 0);
  404. }
  405. Void strPost4x4Stage2Split_alternate(PixelI* p0, PixelI* p1)
  406. {
  407. /** buttefly **/
  408. strDCT2x2dn(p0 - 96, p0 + 96, p1 - 112, p1 + 80);
  409. strDCT2x2dn(p0 - 32, p0 + 32, p1 - 48, p1 + 16);
  410. strDCT2x2dn(p0 - 80, p0 + 112, p1 - 128, p1 + 64);
  411. strDCT2x2dn(p0 - 16, p0 + 48, p1 - 64, p1 + 0);
  412. /** bottom right corner: -pi/8 rotation => -pi/8 rotation **/
  413. invOddOddPost(p1 + 0, p1 + 64, p1 + 16, p1 + 80);
  414. /** anti diagonal corners: rotation by -pi/8 **/
  415. IROTATE1(p0[ 48], p0[ 32]);
  416. IROTATE1(p0[112], p0[ 96]);
  417. IROTATE1(p1[-64], p1[-128]);
  418. IROTATE1(p1[-48], p1[-112]);
  419. /** butterfly **/
  420. strHSTdec1_alternate(p0 - 96, p1 + 80);
  421. strHSTdec1_alternate(p0 - 32, p1 + 16);
  422. strHSTdec1_alternate(p0 - 80, p1 + 64);
  423. strHSTdec1_alternate(p0 - 16, p1 + 0);
  424. strHSTdec(p0 - 96, p1 - 112, p0 + 96, p1 + 80);
  425. strHSTdec(p0 - 32, p1 - 48, p0 + 32, p1 + 16);
  426. strHSTdec(p0 - 80, p1 - 128, p0 + 112, p1 + 64);
  427. strHSTdec(p0 - 16, p1 - 64, p0 + 48, p1 + 0);
  428. }
  429. /**
  430. Hadamard+Scale transform
  431. for some strange reason, breaking up the function into two blocks, strHSTdec1 and strHSTdec
  432. seems to work faster
  433. **/
  434. static Void strHSTdec1(PixelI *pa, PixelI *pd)
  435. {
  436. /** different realization : does rescaling as well! **/
  437. PixelI a, d;
  438. a = *pa;
  439. d = *pd;
  440. a += d;
  441. d = (a >> 1) - d;
  442. a += (d * 3 + 0) >> 3;
  443. d += (a * 3 + 0) >> 4;
  444. //a += (d * 3 + 4) >> 3;
  445. *pa = a;
  446. *pd = d;
  447. }
  448. static Void strHSTdec1_alternate(PixelI *pa, PixelI *pd)
  449. {
  450. /** different realization : does rescaling as well! **/
  451. PixelI a, d;
  452. a = *pa;
  453. d = *pd;
  454. a += d;
  455. d = (a >> 1) - d;
  456. a += (d * 3 + 0) >> 3;
  457. d += (a * 3 + 0) >> 4;
  458. //a += (d * 3 + 4) >> 3;
  459. d += (a >> 7);
  460. d -= (a >> 10);
  461. *pa = a;
  462. *pd = d;
  463. }
  464. static Void strHSTdec1_edge (PixelI *pa, PixelI *pd)
  465. {
  466. /** different realization as compared to scaling operator for 2D case **/
  467. PixelI a, d;
  468. a = *pa;
  469. d = *pd;
  470. a += d;
  471. d = (a >> 1) - d;
  472. a += (d * 3 + 0) >> 3;
  473. d += (a * 3 + 0) >> 4;
  474. //Scaling modification of adding 7/1024 in 2 steps (without multiplication by 7).
  475. d += (a >> 7);
  476. d -= (a >> 10);
  477. a += (d * 3 + 4) >> 3;
  478. d -= (a >> 1);
  479. a += d;
  480. // End new operations
  481. *pa = a;
  482. *pd = -d; // Negative sign needed here for 1D scaling case to ensure correct scaling.
  483. }
  484. static Void strHSTdec(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
  485. {
  486. /** different realization : does rescaling as well! **/
  487. PixelI a, b, c, d;
  488. a = *pa;
  489. b = *pb;
  490. c = *pc;
  491. d = *pd;
  492. b -= c;
  493. a += (d * 3 + 4) >> 3;
  494. d -= (b >> 1);
  495. c = ((a - b) >> 1) - c;
  496. *pc = d;
  497. *pd = c;
  498. *pa = a - c, *pb = b + d;
  499. }
  500. /** Kron(Rotate(pi/8), Rotate(pi/8)) **/
  501. static Void invOddOdd(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
  502. {
  503. PixelI a, b, c, d, t1, t2;
  504. a = *pa;
  505. b = *pb;
  506. c = *pc;
  507. d = *pd;
  508. /** butterflies **/
  509. d += a;
  510. c -= b;
  511. a -= (t1 = d >> 1);
  512. b += (t2 = c >> 1);
  513. /** rotate pi/4 **/
  514. a -= (b * 3 + 3) >> 3;
  515. b += (a * 3 + 3) >> 2;
  516. a -= (b * 3 + 4) >> 3;
  517. /** butterflies **/
  518. b -= t2;
  519. a += t1;
  520. c += b;
  521. d -= a;
  522. /** sign flips **/
  523. *pa = a;
  524. *pb = -b;
  525. *pc = -c;
  526. *pd = d;
  527. }
  528. /** Kron(Rotate(pi/8), Rotate(pi/8)) **/
  529. static Void invOddOddPost(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
  530. {
  531. PixelI a, b, c, d, t1, t2;
  532. a = *pa;
  533. b = *pb;
  534. c = *pc;
  535. d = *pd;
  536. /** butterflies **/
  537. d += a;
  538. c -= b;
  539. a -= (t1 = d >> 1);
  540. b += (t2 = c >> 1);
  541. /** rotate pi/4 **/
  542. a -= (b * 3 + 6) >> 3;
  543. b += (a * 3 + 2) >> 2;
  544. a -= (b * 3 + 4) >> 3;
  545. /** butterflies **/
  546. b -= t2;
  547. a += t1;
  548. c += b;
  549. d -= a;
  550. *pa = a;
  551. *pb = b;
  552. *pc = c;
  553. *pd = d;
  554. }
  555. /** Kron(Rotate(-pi/8), [1 1; 1 -1]/sqrt(2)) **/
  556. /** [D C A B] => [a b c d] **/
  557. Void invOdd(PixelI *pa, PixelI *pb, PixelI *pc, PixelI *pd)
  558. {
  559. PixelI a, b, c, d;
  560. a = *pa;
  561. b = *pb;
  562. c = *pc;
  563. d = *pd;
  564. /** butterflies **/
  565. b += d;
  566. a -= c;
  567. d -= (b) >> 1;
  568. c += (a + 1) >> 1;
  569. /** rotate pi/8 **/
  570. IROTATE2(a, b);
  571. IROTATE2(c, d);
  572. /** butterflies **/
  573. c -= (b + 1) >> 1;
  574. d = ((a + 1) >> 1) - d;
  575. b += c;
  576. a -= d;
  577. *pa = a;
  578. *pb = b;
  579. *pc = c;
  580. *pd = d;
  581. }
  582. /*************************************************************************
  583. Top-level function to inverse tranform possible part of a macroblock
  584. *************************************************************************/
  585. Int invTransformMacroblock(CWMImageStrCodec * pSC)
  586. {
  587. const OVERLAP olOverlap = pSC->WMISCP.olOverlap;
  588. const COLORFORMAT cfColorFormat = pSC->m_param.cfColorFormat;
  589. // const BITDEPTH_BITS bdBitDepth = pSC->WMII.bdBitDepth;
  590. const Bool left = (pSC->cColumn == 0), right = (pSC->cColumn == pSC->cmbWidth);
  591. const Bool top = (pSC->cRow == 0), bottom = (pSC->cRow == pSC->cmbHeight);
  592. const Bool topORbottom = (top || bottom), leftORright = (left || right);
  593. const Bool topORleft = (top || left), bottomORright = (bottom || right);
  594. const size_t mbWidth = pSC->cmbWidth, mbX = pSC->cColumn;
  595. PixelI * p = NULL;// * pt = NULL;
  596. size_t i;
  597. const size_t iChannels = (cfColorFormat == YUV_420 || cfColorFormat == YUV_422) ? 1 : pSC->m_param.cNumChannels;
  598. const size_t tScale = pSC->m_Dparam->cThumbnailScale;
  599. Int j = 0;
  600. Int qp[MAX_CHANNELS], dcqp[MAX_CHANNELS], iStrength = (1 << pSC->WMII.cPostProcStrength);
  601. // ERR_CODE result = ICERR_OK;
  602. Bool bHPAbsent = (pSC->WMISCP.sbSubband == SB_NO_HIGHPASS || pSC->WMISCP.sbSubband == SB_DC_ONLY);
  603. if(pSC->WMII.cPostProcStrength > 0){
  604. // threshold for post processing
  605. for(i = 0; i < iChannels; i ++){
  606. qp[i] = pSC->pTile[pSC->cTileColumn].pQuantizerLP[i][pSC->MBInfo.iQIndexLP].iQP * iStrength * (olOverlap == OL_NONE ? 2 : 1);
  607. dcqp[i] = pSC->pTile[pSC->cTileColumn].pQuantizerDC[i][0].iQP * iStrength;
  608. }
  609. if(left) // a new MB row
  610. slideOneMBRow(pSC->pPostProcInfo, pSC->m_param.cNumChannels, mbWidth, top, bottom); // previous current row becomes previous row
  611. }
  612. //================================================================
  613. // 400_Y, 444_YUV
  614. for (i = 0; i < iChannels && tScale < 16; ++i)
  615. {
  616. PixelI* const p0 = pSC->p0MBbuffer[i];
  617. PixelI* const p1 = pSC->p1MBbuffer[i];
  618. Int iHPQP = 255;
  619. if (!bHPAbsent)
  620. iHPQP = pSC->pTile[pSC->cTileColumn].pQuantizerHP[i][pSC->MBInfo.iQIndexHP].iQP;
  621. //================================
  622. // second level inverse transform
  623. if (!bottomORright)
  624. {
  625. if(pSC->WMII.cPostProcStrength > 0)
  626. updatePostProcInfo(pSC->pPostProcInfo, p1, mbX, i); // update postproc info before IDCT
  627. strIDCT4x4Stage2(p1);
  628. if (pSC->m_param.bScaledArith) {
  629. strNormalizeDec(p1, (i != 0));
  630. }
  631. }
  632. //================================
  633. // second level inverse overlap
  634. if (OL_TWO == olOverlap)
  635. {
  636. if (leftORright && (!topORbottom))
  637. {
  638. j = left ? 0 : -128;
  639. strPost4(p0 + j + 32, p0 + j + 48, p1 + j + 0, p1 + j + 16);
  640. strPost4(p0 + j + 96, p0 + j + 112, p1 + j + 64, p1 + j + 80);
  641. }
  642. if (!leftORright)
  643. {
  644. if (topORbottom)
  645. {
  646. p = top ? p1 : p0 + 32;
  647. strPost4(p - 128, p - 64, p + 0, p + 64);
  648. strPost4(p - 112, p - 48, p + 16, p + 80);
  649. p = NULL;
  650. }
  651. else
  652. {
  653. strPost4x4Stage2Split(p0, p1);
  654. }
  655. }
  656. }
  657. if(pSC->WMII.cPostProcStrength > 0)
  658. postProcMB(pSC->pPostProcInfo, p0, p1, mbX, i, dcqp[i]); // second stage deblocking
  659. //================================
  660. // first level inverse transform
  661. if(tScale >= 4) // bypass first level transform for 4:1 and smaller thumbnail
  662. continue;
  663. if (!top)
  664. {
  665. for (j = (left ? 32 : -96); j < (right ? 32 : 160); j += 64)
  666. {
  667. strIDCT4x4Stage1(p0 + j + 0);
  668. strIDCT4x4Stage1(p0 + j + 16);
  669. }
  670. }
  671. if (!bottom)
  672. {
  673. for (j = (left ? 0 : -128); j < (right ? 0 : 128); j += 64)
  674. {
  675. strIDCT4x4Stage1(p1 + j + 0);
  676. strIDCT4x4Stage1(p1 + j + 16);
  677. }
  678. }
  679. //================================
  680. // first level inverse overlap
  681. if (OL_NONE != olOverlap)
  682. {
  683. if (leftORright)
  684. {
  685. j = left ? 0 + 10 : -64 + 14;
  686. if (!top)
  687. {
  688. p = p0 + 16 + j;
  689. strPost4(p + 0, p - 2, p + 6, p + 8);
  690. strPost4(p + 1, p - 1, p + 7, p + 9);
  691. strPost4(p + 16, p + 14, p + 22, p + 24);
  692. strPost4(p + 17, p + 15, p + 23, p + 25);
  693. p = NULL;
  694. }
  695. if (!bottom)
  696. {
  697. p = p1 + j;
  698. strPost4(p + 0, p - 2, p + 6, p + 8);
  699. strPost4(p + 1, p - 1, p + 7, p + 9);
  700. p = NULL;
  701. }
  702. if (!topORbottom)
  703. {
  704. strPost4(p0 + 48 + j + 0, p0 + 48 + j - 2, p1 - 10 + j, p1 - 8 + j);
  705. strPost4(p0 + 48 + j + 1, p0 + 48 + j - 1, p1 - 9 + j, p1 - 7 + j);
  706. }
  707. }
  708. if (top)
  709. {
  710. for (j = (left ? 0 : -192); j < (right ? -64 : 64); j += 64)
  711. {
  712. p = p1 + j;
  713. strPost4(p + 5, p + 4, p + 64, p + 65);
  714. strPost4(p + 7, p + 6, p + 66, p + 67);
  715. p = NULL;
  716. strPost4x4Stage1(p1 + j, 0, iHPQP, bHPAbsent);
  717. }
  718. }
  719. else if (bottom)
  720. {
  721. for (j = (left ? 0 : -192); j < (right ? -64 : 64); j += 64)
  722. {
  723. strPost4x4Stage1(p0 + 16 + j, 0, iHPQP, bHPAbsent);
  724. strPost4x4Stage1(p0 + 32 + j, 0, iHPQP, bHPAbsent);
  725. p = p0 + 48 + j;
  726. strPost4(p + 15, p + 14, p + 74, p + 75);
  727. strPost4(p + 13, p + 12, p + 72, p + 73);
  728. p = NULL;
  729. }
  730. }
  731. else
  732. {
  733. for (j = (left ? 0 : -192); j < (right ? -64 : 64); j += 64)
  734. {
  735. strPost4x4Stage1(p0 + 16 + j, 0, iHPQP, bHPAbsent);
  736. strPost4x4Stage1(p0 + 32 + j, 0, iHPQP, bHPAbsent);
  737. strPost4x4Stage1Split(p0 + 48 + j, p1 + j, 0, iHPQP, bHPAbsent);
  738. strPost4x4Stage1(p1 + j, 0, iHPQP, bHPAbsent);
  739. }
  740. }
  741. }
  742. if(pSC->WMII.cPostProcStrength > 0 && (!topORleft))
  743. postProcBlock(pSC->pPostProcInfo, p0, p1, mbX, i, qp[i]); // destairing and first stage deblocking
  744. }
  745. //================================================================
  746. // 420_UV
  747. for (i = 0; i < (YUV_420 == cfColorFormat? 2U : 0U) && tScale < 16; ++i)
  748. {
  749. PixelI* const p0 = pSC->p0MBbuffer[1 + i];//(0 == i ? pSC->pU0 : pSC->pV0);
  750. PixelI* const p1 = pSC->p1MBbuffer[1 + i];//(0 == i ? pSC->pU1 : pSC->pV1);
  751. Int iHPQP = 255;
  752. if (!bHPAbsent)
  753. iHPQP = pSC->pTile[pSC->cTileColumn].pQuantizerHP[i][pSC->MBInfo.iQIndexHP].iQP;
  754. //========================================
  755. // second level inverse transform (420_UV)
  756. if (!bottomORright)
  757. {
  758. if (!pSC->m_param.bScaledArith) {
  759. strDCT2x2dn(p1, p1 + 32, p1 + 16, p1 + 48);
  760. }
  761. else {
  762. strDCT2x2dnDec(p1, p1 + 32, p1 + 16, p1 + 48);
  763. }
  764. }
  765. //========================================
  766. // second level inverse overlap (420_UV)
  767. if (OL_TWO == olOverlap)
  768. {
  769. if (leftORright && !topORbottom)
  770. {
  771. j = (left ? 0 : -32);
  772. strPost2(p0 + j + 16, p1 + j);
  773. }
  774. if (!leftORright)
  775. {
  776. if (topORbottom)
  777. {
  778. p = (top ? p1 : p0 + 16);
  779. strPost2(p - 32, p);
  780. p = NULL;
  781. }
  782. else{
  783. strPost2x2(p0 - 16, p0 + 16, p1 - 32, p1);
  784. }
  785. }
  786. }
  787. //========================================
  788. // first level inverse transform (420_UV)
  789. if(tScale >= 4) // bypass first level transform for 4:1 and smaller thumbnail
  790. continue;
  791. if (!top)
  792. {
  793. for (j = (left ? 16 : -16); j < (right ? 16 : 48); j += 32)
  794. {
  795. strIDCT4x4Stage1(p0 + j);
  796. }
  797. }
  798. if (!bottom)
  799. {
  800. for (j = (left ? 0 : -32); j < (right ? 0 : 32); j += 32)
  801. {
  802. strIDCT4x4Stage1(p1 + j);
  803. }
  804. }
  805. //========================================
  806. // first level inverse overlap (420_UV)
  807. if (OL_NONE != olOverlap)
  808. {
  809. if(!left && !top)
  810. {
  811. if (bottom)
  812. {
  813. for (j = -48; j < (right ? -16 : 16); j += 32)
  814. {
  815. p = p0 + j;
  816. strPost4(p + 15, p + 14, p + 42, p + 43);
  817. strPost4(p + 13, p + 12, p + 40, p + 41);
  818. p = NULL;
  819. }
  820. }
  821. else
  822. {
  823. for (j = -48; j < (right ? -16 : 16); j += 32)
  824. {
  825. strPost4x4Stage1Split(p0 + j, p1 - 16 + j, 32, iHPQP, bHPAbsent);
  826. }
  827. }
  828. if (right)
  829. {
  830. if (!bottom)
  831. {
  832. strPost4(p0 - 2 , p0 - 4 , p1 - 28, p1 - 26);
  833. strPost4(p0 - 1 , p0 - 3 , p1 - 27, p1 - 25);
  834. }
  835. strPost4(p0 - 18, p0 - 20, p0 - 12, p0 - 10);
  836. strPost4(p0 - 17, p0 - 19, p0 - 11, p0 - 9);
  837. }
  838. else
  839. {
  840. strPost4x4Stage1(p0 - 32, 32, iHPQP, bHPAbsent);
  841. }
  842. strPost4x4Stage1(p0 - 64, 32, iHPQP, bHPAbsent);
  843. }
  844. else if (top)
  845. {
  846. for (j = (left ? 0: -64); j < (right ? -32: 0); j += 32)
  847. {
  848. p = p1 + j + 4;
  849. strPost4(p + 1, p + 0, p + 28, p + 29);
  850. strPost4(p + 3, p + 2, p + 30, p + 31);
  851. p = NULL;
  852. }
  853. }
  854. else if (left)
  855. {
  856. if (!bottom)
  857. {
  858. strPost4(p0 + 26, p0 + 24, p1 + 0, p1 + 2);
  859. strPost4(p0 + 27, p0 + 25, p1 + 1, p1 + 3);
  860. }
  861. strPost4(p0 + 10, p0 + 8, p0 + 16, p0 + 18);
  862. strPost4(p0 + 11, p0 + 9, p0 + 17, p0 + 19);
  863. }
  864. }
  865. }
  866. //================================================================
  867. // 422_UV
  868. for (i = 0; i < (YUV_422 == cfColorFormat? 2U : 0U) && tScale < 16; ++i)
  869. {
  870. PixelI* const p0 = pSC->p0MBbuffer[1 + i];//(0 == i ? pSC->pU0 : pSC->pV0);
  871. PixelI* const p1 = pSC->p1MBbuffer[1 + i];//(0 == i ? pSC->pU1 : pSC->pV1);
  872. Int iHPQP = 255;
  873. if (!bHPAbsent)
  874. iHPQP = pSC->pTile[pSC->cTileColumn].pQuantizerHP[i][pSC->MBInfo.iQIndexHP].iQP;
  875. //========================================
  876. // second level inverse transform (422_UV)
  877. if ((!bottomORright) && pSC->m_Dparam->cThumbnailScale < 16)
  878. {
  879. // 1D lossless HT
  880. p1[0] -= ((p1[32] + 1) >> 1);
  881. p1[32] += p1[0];
  882. if (!pSC->m_param.bScaledArith) {
  883. strDCT2x2dn(p1 + 0, p1 + 64, p1 + 16, p1 + 80);
  884. strDCT2x2dn(p1 + 32, p1 + 96, p1 + 48, p1 + 112);
  885. }
  886. else {
  887. strDCT2x2dnDec(p1 + 0, p1 + 64, p1 + 16, p1 + 80);
  888. strDCT2x2dnDec(p1 + 32, p1 + 96, p1 + 48, p1 + 112);
  889. }
  890. }
  891. //========================================
  892. // second level inverse overlap (422_UV)
  893. if (OL_TWO == olOverlap)
  894. {
  895. if (!bottom)
  896. {
  897. if (leftORright)
  898. {
  899. if (!top)
  900. {
  901. j = (left ? 0 : -64);
  902. strPost2(p0 + 48 + j, p1 + j);
  903. }
  904. j = (left ? 16 : -48);
  905. strPost2(p1 + j, p1 + j + 16);
  906. }
  907. else
  908. {
  909. if (top)
  910. {
  911. strPost2(p1 - 64, p1);
  912. }
  913. else
  914. {
  915. strPost2x2(p0 - 16, p0 + 48, p1 - 64, p1);
  916. }
  917. strPost2x2(p1 - 48, p1 + 16, p1 - 32, p1 + 32);
  918. }
  919. }
  920. else if (!leftORright)
  921. {
  922. strPost2(p0 - 16, p0 + 48);
  923. }
  924. }
  925. //========================================
  926. // first level inverse transform (422_UV)
  927. if(tScale >= 4) // bypass first level transform for 4:1 and smaller thumbnail
  928. continue;
  929. if (!top)
  930. {
  931. for (j = (left ? 48 : -16); j < (right ? 48 : 112); j += 64)
  932. {
  933. strIDCT4x4Stage1(p0 + j);
  934. }
  935. }
  936. if (!bottom)
  937. {
  938. for (j = (left ? 0 : -64); j < (right ? 0 : 64); j += 64)
  939. {
  940. strIDCT4x4Stage1(p1 + j + 0);
  941. strIDCT4x4Stage1(p1 + j + 16);
  942. strIDCT4x4Stage1(p1 + j + 32);
  943. }
  944. }
  945. //========================================
  946. // first level inverse overlap (422_UV)
  947. if (OL_NONE != olOverlap)
  948. {
  949. if (!top)
  950. {
  951. if (leftORright)
  952. {
  953. j = (left ? 32 + 10 : -32 + 14);
  954. p = p0 + j;
  955. strPost4(p + 0, p - 2, p + 6, p + 8);
  956. strPost4(p + 1, p - 1, p + 7, p + 9);
  957. p = NULL;
  958. }
  959. for (j = (left ? 0 : -128); j < (right ? -64 : 0); j += 64)
  960. {
  961. strPost4x4Stage1(p0 + j + 32, 0, iHPQP, bHPAbsent);
  962. }
  963. }
  964. if (!bottom)
  965. {
  966. if (leftORright)
  967. {
  968. j = (left ? 0 + 10 : -64 + 14);
  969. p = p1 + j;
  970. strPost4(p + 0, p - 2, p + 6, p + 8);
  971. strPost4(p + 1, p - 1, p + 7, p + 9);
  972. p += 16;
  973. strPost4(p + 0, p - 2, p + 6, p + 8);
  974. strPost4(p + 1, p - 1, p + 7, p + 9);
  975. p = NULL;
  976. }
  977. for (j = (left ? 0 : -128); j < (right ? -64 : 0); j += 64)
  978. {
  979. strPost4x4Stage1(p1 + j + 0, 0, iHPQP, bHPAbsent);
  980. strPost4x4Stage1(p1 + j + 16, 0, iHPQP, bHPAbsent);
  981. }
  982. }
  983. if (topORbottom)
  984. {
  985. p = (top ? p1 + 5 : p0 + 48 + 13);
  986. for (j = (left ? 0 : -128); j < (right ? -64 : 0); j += 64)
  987. {
  988. strPost4(p + j + 0, p + j - 1, p + j + 59, p + j + 60);
  989. strPost4(p + j + 2, p + j + 1, p + j + 61, p + j + 62);
  990. }
  991. p = NULL;
  992. }
  993. else
  994. {
  995. if (leftORright)
  996. {
  997. j = (left ? 0 + 0 : -64 + 4);
  998. strPost4(p0 + j + 48 + 10 + 0, p0 + j + 48 + 10 - 2, p1 + j + 0, p1 + j + 2);
  999. strPost4(p0 + j + 48 + 10 + 1, p0 + j + 48 + 10 - 1, p1 + j + 1, p1 + j + 3);
  1000. }
  1001. for (j = (left ? 0 : -128); j < (right ? -64 : 0); j += 64)
  1002. {
  1003. strPost4x4Stage1Split(p0 + j + 48, p1 + j + 0, 0, iHPQP, bHPAbsent);
  1004. }
  1005. }
  1006. }
  1007. }
  1008. return ICERR_OK;
  1009. }
  1010. Int invTransformMacroblock_alteredOperators_hard(CWMImageStrCodec * pSC)
  1011. {
  1012. const OVERLAP olOverlap = pSC->WMISCP.olOverlap;
  1013. const COLORFORMAT cfColorFormat = pSC->m_param.cfColorFormat;
  1014. // const BITDEPTH_BITS bdBitDepth = pSC->WMII.bdBitDepth;
  1015. const Bool left = (pSC->cColumn == 0), right = (pSC->cColumn == pSC->cmbWidth);
  1016. const Bool top = (pSC->cRow == 0), bottom = (pSC->cRow == pSC->cmbHeight);
  1017. const Bool topORbottom = (top || bottom), leftORright = (left || right);
  1018. const Bool topORleft = (top || left), bottomORright = (bottom || right);
  1019. Bool leftAdjacentColumn = (pSC->cColumn == 1), rightAdjacentColumn = (pSC->cColumn == pSC->cmbWidth - 1);
  1020. // Bool topAdjacentRow = (pSC->cRow == 1), bottomAdjacentRow = (pSC->cRow == pSC->cmbHeight - 1);
  1021. const size_t mbWidth = pSC->cmbWidth;
  1022. PixelI * p = NULL;// * pt = NULL;
  1023. size_t i;
  1024. const size_t iChannels = (cfColorFormat == YUV_420 || cfColorFormat == YUV_422) ? 1 : pSC->m_param.cNumChannels;
  1025. const size_t tScale = pSC->m_Dparam->cThumbnailScale;
  1026. Int j = 0;
  1027. Int qp[MAX_CHANNELS], dcqp[MAX_CHANNELS], iStrength = (1 << pSC->WMII.cPostProcStrength);
  1028. // ERR_CODE result = ICERR_OK;
  1029. #define mbX pSC->mbX
  1030. #define mbY pSC->mbY
  1031. #define tileX pSC->tileX
  1032. #define tileY pSC->tileY
  1033. #define bVertTileBoundary pSC->bVertTileBoundary
  1034. #define bHoriTileBoundary pSC->bHoriTileBoundary
  1035. #define bOneMBLeftVertTB pSC->bOneMBLeftVertTB
  1036. #define bOneMBRightVertTB pSC->bOneMBRightVertTB
  1037. #define iPredBefore pSC->iPredBefore
  1038. #define iPredAfter pSC->iPredAfter
  1039. if (pSC->WMISCP.bUseHardTileBoundaries) {
  1040. //Add tile location information
  1041. if (pSC->cColumn == 0) {
  1042. bVertTileBoundary = FALSE;
  1043. tileY = 0;
  1044. }
  1045. bOneMBLeftVertTB = bOneMBRightVertTB = FALSE;
  1046. if(tileY > 0 && tileY <= pSC->WMISCP.cNumOfSliceMinus1H && (pSC->cColumn - 1) == pSC->WMISCP.uiTileY[tileY])
  1047. bOneMBRightVertTB = TRUE;
  1048. if(tileY < pSC->WMISCP.cNumOfSliceMinus1H && pSC->cColumn == pSC->WMISCP.uiTileY[tileY + 1]) {
  1049. bVertTileBoundary = TRUE;
  1050. tileY++;
  1051. }
  1052. else
  1053. bVertTileBoundary = FALSE;
  1054. if(tileY < pSC->WMISCP.cNumOfSliceMinus1H && (pSC->cColumn + 1) == pSC->WMISCP.uiTileY[tileY + 1])
  1055. bOneMBLeftVertTB = TRUE;
  1056. if (pSC->cRow == 0) {
  1057. bHoriTileBoundary = FALSE;
  1058. tileX = 0;
  1059. }
  1060. else if(mbY != pSC->cRow && tileX < pSC->WMISCP.cNumOfSliceMinus1V && pSC->cRow == pSC->WMISCP.uiTileX[tileX + 1]) {
  1061. bHoriTileBoundary = TRUE;
  1062. tileX++;
  1063. }
  1064. else if(mbY != pSC->cRow)
  1065. bHoriTileBoundary = FALSE;
  1066. }
  1067. else {
  1068. bVertTileBoundary = FALSE;
  1069. bHoriTileBoundary = FALSE;
  1070. bOneMBLeftVertTB = FALSE;
  1071. bOneMBRightVertTB = FALSE;
  1072. }
  1073. mbX = pSC->cColumn, mbY = pSC->cRow;
  1074. if(pSC->WMII.cPostProcStrength > 0){
  1075. // threshold for post processing
  1076. for(i = 0; i < iChannels; i ++){
  1077. qp[i] = pSC->pTile[pSC->cTileColumn].pQuantizerLP[i][pSC->MBInfo.iQIndexLP].iQP * iStrength * (olOverlap == OL_NONE ? 2 : 1);
  1078. dcqp[i] = pSC->pTile[pSC->cTileColumn].pQuantizerDC[i][0].iQP * iStrength;
  1079. }
  1080. if(left) // a new MB row
  1081. slideOneMBRow(pSC->pPostProcInfo, pSC->m_param.cNumChannels, mbWidth, top, bottom); // previous current row becomes previous row
  1082. }
  1083. //================================================================
  1084. // 400_Y, 444_YUV
  1085. for (i = 0; i < iChannels && tScale < 16; ++i)
  1086. {
  1087. PixelI* const p0 = pSC->p0MBbuffer[i];
  1088. PixelI* const p1 = pSC->p1MBbuffer[i];
  1089. //================================
  1090. // second level inverse transform
  1091. if (!bottomORright)
  1092. {
  1093. if(pSC->WMII.cPostProcStrength > 0)
  1094. updatePostProcInfo(pSC->pPostProcInfo, p1, mbX, i); // update postproc info before IDCT
  1095. strIDCT4x4Stage2(p1);
  1096. if (pSC->m_param.bScaledArith) {
  1097. strNormalizeDec(p1, (i != 0));
  1098. }
  1099. }
  1100. //================================
  1101. // second level inverse overlap
  1102. if (OL_TWO == olOverlap)
  1103. {
  1104. /* Corner operations */
  1105. if ((top || bHoriTileBoundary) && (left || bVertTileBoundary))
  1106. strPost4_alternate(p1 + 0, p1 + 64, p1 + 0 + 16, p1 + 64 + 16);
  1107. if ((top || bHoriTileBoundary) && (right || bVertTileBoundary))
  1108. strPost4_alternate(p1 - 128, p1 - 64, p1 - 128 + 16, p1 - 64 + 16);
  1109. if ((bottom || bHoriTileBoundary) && (left || bVertTileBoundary))
  1110. strPost4_alternate(p0 + 32, p0 + 96, p0 + 32 + 16, p0 + 96 + 16);
  1111. if ((bottom || bHoriTileBoundary) && (right || bVertTileBoundary))
  1112. strPost4_alternate(p0 - 96, p0 - 32, p0 - 96 + 16, p0 - 32 + 16);
  1113. if ((leftORright || bVertTileBoundary) && (!topORbottom && !bHoriTileBoundary))
  1114. {
  1115. if (left || bVertTileBoundary) {
  1116. j = 0;
  1117. strPost4_alternate(p0 + j + 32, p0 + j + 48, p1 + j + 0, p1 + j + 16);
  1118. strPost4_alternate(p0 + j + 96, p0 + j + 112, p1 + j + 64, p1 + j + 80);
  1119. }
  1120. if (right || bVertTileBoundary) {
  1121. j = -128;
  1122. strPost4_alternate(p0 + j + 32, p0 + j + 48, p1 + j + 0, p1 + j + 16);
  1123. strPost4_alternate(p0 + j + 96, p0 + j + 112, p1 + j + 64, p1 + j + 80);
  1124. }
  1125. }
  1126. if (!leftORright)
  1127. {
  1128. if ((topORbottom || bHoriTileBoundary) && !bVertTileBoundary)
  1129. {
  1130. if (top || bHoriTileBoundary) {
  1131. p = p1;
  1132. strPost4_alternate(p - 128, p - 64, p + 0, p + 64);
  1133. strPost4_alternate(p - 112, p - 48, p + 16, p + 80);
  1134. p = NULL;
  1135. }
  1136. if (bottom || bHoriTileBoundary) {
  1137. p = p0 + 32;
  1138. strPost4_alternate(p - 128, p - 64, p + 0, p + 64);
  1139. strPost4_alternate(p - 112, p - 48, p + 16, p + 80);
  1140. p = NULL;
  1141. }
  1142. }
  1143. if (!topORbottom && !bHoriTileBoundary && !bVertTileBoundary)
  1144. strPost4x4Stage2Split_alternate(p0, p1);
  1145. }
  1146. }
  1147. if(pSC->WMII.cPostProcStrength > 0)
  1148. postProcMB(pSC->pPostProcInfo, p0, p1, mbX, i, dcqp[i]); // second stage deblocking
  1149. //================================
  1150. // first level inverse transform
  1151. if(tScale >= 4) // bypass first level transform for 4:1 and smaller thumbnail
  1152. continue;
  1153. if (!top)
  1154. {
  1155. for (j = (left ? 32 : -96); j < (right ? 32 : 160); j += 64)
  1156. {
  1157. strIDCT4x4Stage1(p0 + j + 0);
  1158. strIDCT4x4Stage1(p0 + j + 16);
  1159. }
  1160. }
  1161. if (!bottom)
  1162. {
  1163. for (j = (left ? 0 : -128); j < (right ? 0 : 128); j += 64)
  1164. {
  1165. // if(tScale == 2 && bdBitDepth != BD_1){
  1166. // MIPgen(p1 + j + 0);
  1167. // MIPgen(p1 + j + 16);
  1168. // }
  1169. strIDCT4x4Stage1(p1 + j + 0);
  1170. strIDCT4x4Stage1(p1 + j + 16);
  1171. }
  1172. }
  1173. //================================
  1174. // first level inverse overlap
  1175. if (OL_NONE != olOverlap)
  1176. {
  1177. if (leftORright || bVertTileBoundary)
  1178. {
  1179. /* Corner operations */
  1180. if ((top || bHoriTileBoundary) && (left || bVertTileBoundary))
  1181. strPost4_alternate(p1 + 0, p1 + 1, p1 + 2, p1 + 3);
  1182. if ((top || bHoriTileBoundary) && (right || bVertTileBoundary))
  1183. strPost4_alternate(p1 - 59, p1 - 60, p1 - 57, p1 - 58);
  1184. if ((bottom || bHoriTileBoundary) && (left || bVertTileBoundary))
  1185. strPost4_alternate(p0 + 48 + 10, p0 + 48 + 11, p0 + 48 + 8, p0 + 48 + 9);
  1186. if ((bottom || bHoriTileBoundary) && (right || bVertTileBoundary))
  1187. strPost4_alternate(p0 - 1, p0 - 2, p0 - 3, p0 - 4);
  1188. if (left || bVertTileBoundary) {
  1189. j = 0 + 10;
  1190. if (!top)
  1191. {
  1192. p = p0 + 16 + j;
  1193. strPost4_alternate(p + 0, p - 2, p + 6, p + 8);
  1194. strPost4_alternate(p + 1, p - 1, p + 7, p + 9);
  1195. strPost4_alternate(p + 16, p + 14, p + 22, p + 24);
  1196. strPost4_alternate(p + 17, p + 15, p + 23, p + 25);
  1197. p = NULL;
  1198. }
  1199. if (!bottom)
  1200. {
  1201. p = p1 + j;
  1202. strPost4_alternate(p + 0, p - 2, p + 6, p + 8);
  1203. strPost4_alternate(p + 1, p - 1, p + 7, p + 9);
  1204. p = NULL;
  1205. }
  1206. if (!topORbottom && !bHoriTileBoundary)
  1207. {
  1208. strPost4_alternate(p0 + 48 + j + 0, p0 + 48 + j - 2, p1 - 10 + j, p1 - 8 + j);
  1209. strPost4_alternate(p0 + 48 + j + 1, p0 + 48 + j - 1, p1 - 9 + j, p1 - 7 + j);
  1210. }
  1211. }
  1212. if (right || bVertTileBoundary) {
  1213. j = -64 + 14;
  1214. if (!top)
  1215. {
  1216. p = p0 + 16 + j;
  1217. strPost4_alternate(p + 0, p - 2, p + 6, p + 8);
  1218. strPost4_alternate(p + 1, p - 1, p + 7, p + 9);
  1219. strPost4_alternate(p + 16, p + 14, p + 22, p + 24);
  1220. strPost4_alternate(p + 17, p + 15, p + 23, p + 25);
  1221. p = NULL;
  1222. }
  1223. if (!bottom)
  1224. {
  1225. p = p1 + j;
  1226. strPost4_alternate(p + 0, p - 2, p + 6, p + 8);
  1227. strPost4_alternate(p + 1, p - 1, p + 7, p + 9);
  1228. p = NULL;
  1229. }
  1230. if (!topORbottom && !bHoriTileBoundary)
  1231. {
  1232. strPost4_alternate(p0 + 48 + j + 0, p0 + 48 + j - 2, p1 - 10 + j, p1 - 8 + j);
  1233. strPost4_alternate(p0 + 48 + j + 1, p0 + 48 + j - 1, p1 - 9 + j, p1 - 7 + j);
  1234. }
  1235. }
  1236. }
  1237. if (top || bHoriTileBoundary)
  1238. {
  1239. for (j = (left ? 0 : -192); j < (right ? -64 : 64); j += 64)
  1240. {
  1241. if (!bVertTileBoundary || j != -64) {
  1242. p = p1 + j;
  1243. strPost4_alternate(p + 5, p + 4, p + 64, p + 65);
  1244. strPost4_alternate(p + 7, p + 6, p + 66, p + 67);
  1245. p = NULL;
  1246. strPost4x4Stage1_alternate(p1 + j, 0);
  1247. }
  1248. }
  1249. }
  1250. if (bottom || bHoriTileBoundary)
  1251. {
  1252. for (j = (left ? 0 : -192); j < (right ? -64 : 64); j += 64)
  1253. {
  1254. if (!bVertTileBoundary || j != -64) {
  1255. strPost4x4Stage1_alternate(p0 + 16 + j, 0);
  1256. strPost4x4Stage1_alternate(p0 + 32 + j, 0);
  1257. p = p0 + 48 + j;
  1258. strPost4_alternate(p + 15, p + 14, p + 74, p + 75);
  1259. strPost4_alternate(p + 13, p + 12, p + 72, p + 73);
  1260. p = NULL;
  1261. }
  1262. }
  1263. }
  1264. if (!top && !bottom && !bHoriTileBoundary)
  1265. {
  1266. for (j = (left ? 0 : -192); j < (right ? -64 : 64); j += 64)
  1267. {
  1268. if (!bVertTileBoundary || j != -64) {
  1269. strPost4x4Stage1_alternate(p0 + 16 + j, 0);
  1270. strPost4x4Stage1_alternate(p0 + 32 + j, 0);
  1271. strPost4x4Stage1Split_alternate(p0 + 48 + j, p1 + j, 0);
  1272. strPost4x4Stage1_alternate(p1 + j, 0);
  1273. }
  1274. }
  1275. }
  1276. }
  1277. if(pSC->WMII.cPostProcStrength > 0 && (!topORleft))
  1278. postProcBlock(pSC->pPostProcInfo, p0, p1, mbX, i, qp[i]); // destairing and first stage deblocking
  1279. }
  1280. //================================================================
  1281. // 420_UV
  1282. for (i = 0; i < (YUV_420 == cfColorFormat? 2U : 0U) && tScale < 16; ++i)
  1283. {
  1284. PixelI* const p0 = pSC->p0MBbuffer[1 + i];//(0 == i ? pSC->pU0 : pSC->pV0);
  1285. PixelI* const p1 = pSC->p1MBbuffer[1 + i];//(0 == i ? pSC->pU1 : pSC->pV1);
  1286. //========================================
  1287. // second level inverse transform (420_UV)
  1288. if (!bottomORright)
  1289. {
  1290. if (!pSC->m_param.bScaledArith) {
  1291. strDCT2x2dn(p1, p1 + 32, p1 + 16, p1 + 48);
  1292. }
  1293. else {
  1294. strDCT2x2dnDec(p1, p1 + 32, p1 + 16, p1 + 48);
  1295. }
  1296. }
  1297. //========================================
  1298. // second level inverse overlap (420_UV)
  1299. if (OL_TWO == olOverlap)
  1300. {
  1301. if ((leftAdjacentColumn || bOneMBRightVertTB) && (top || bHoriTileBoundary))
  1302. COMPUTE_CORNER_PRED_DIFF(p1 - 64 + 0, *(p1 - 64 + 32));
  1303. if ((rightAdjacentColumn || bOneMBLeftVertTB) && (top || bHoriTileBoundary))
  1304. iPredBefore[i][0] = *(p1 + 0);
  1305. if ((right || bVertTileBoundary) && (top || bHoriTileBoundary))
  1306. COMPUTE_CORNER_PRED_DIFF(p1 - 64 + 32, iPredBefore[i][0]);
  1307. if ((leftAdjacentColumn || bOneMBRightVertTB) && (bottom || bHoriTileBoundary))
  1308. COMPUTE_CORNER_PRED_DIFF(p0 - 64 + 16, *(p0 - 64 + 48));
  1309. if ((rightAdjacentColumn || bOneMBLeftVertTB) && (bottom || bHoriTileBoundary))
  1310. iPredBefore[i][1] = *(p0 + 16);
  1311. if ((right || bVertTileBoundary) && (bottom || bHoriTileBoundary))
  1312. COMPUTE_CORNER_PRED_DIFF(p0 - 64 + 48, iPredBefore[i][1]);
  1313. if ((leftORright || bVertTileBoundary) && !topORbottom && !bHoriTileBoundary)
  1314. {
  1315. if (left || bVertTileBoundary)
  1316. strPost2_alternate(p0 + 0 + 16, p1 + 0);
  1317. if (right || bVertTileBoundary)
  1318. strPost2_alternate(p0 + -32 + 16, p1 + -32);
  1319. }
  1320. if (!leftORright)
  1321. {
  1322. if ((topORbottom || bHoriTileBoundary) && !bVertTileBoundary)
  1323. {
  1324. if (top || bHoriTileBoundary)
  1325. strPost2_alternate(p1 - 32, p1);
  1326. if (bottom || bHoriTileBoundary)
  1327. strPost2_alternate(p0 + 16 - 32, p0 + 16);
  1328. }
  1329. else if (!topORbottom && !bHoriTileBoundary && !bVertTileBoundary) {
  1330. strPost2x2_alternate(p0 - 16, p0 + 16, p1 - 32, p1);
  1331. }
  1332. }
  1333. if ((leftAdjacentColumn || bOneMBRightVertTB) && (top || bHoriTileBoundary))
  1334. COMPUTE_CORNER_PRED_ADD(p1 - 64 + 0, *(p1 - 64 + 32));
  1335. if ((rightAdjacentColumn || bOneMBLeftVertTB) && (top || bHoriTileBoundary))
  1336. iPredAfter[i][0] = *(p1 + 0);
  1337. if ((right || bVertTileBoundary) && (top || bHoriTileBoundary))
  1338. COMPUTE_CORNER_PRED_ADD(p1 - 64 + 32, iPredAfter[i][0]);
  1339. if ((leftAdjacentColumn || bOneMBRightVertTB) && (bottom || bHoriTileBoundary))
  1340. COMPUTE_CORNER_PRED_ADD(p0 - 64 + 16, *(p0 - 64 + 48));
  1341. if ((rightAdjacentColumn || bOneMBLeftVertTB) && (bottom || bHoriTileBoundary))
  1342. iPredAfter[i][1] = *(p0 + 16);
  1343. if ((right || bVertTileBoundary) && (bottom || bHoriTileBoundary))
  1344. COMPUTE_CORNER_PRED_ADD(p0 - 64 + 48, iPredAfter[i][1]);
  1345. }
  1346. //========================================
  1347. // first level inverse transform (420_UV)
  1348. if(tScale >= 4) // bypass first level transform for 4:1 and smaller thumbnail
  1349. continue;
  1350. if (!top)
  1351. {
  1352. // In order to allow correction operation of corner chroma overlap operators (fixed)
  1353. // processing of left most MB column must be delayed by one MB
  1354. // Thus left MB not processed until leftAdjacentColumn = 1
  1355. for (j = ((left) ? 48 : ((leftAdjacentColumn || bOneMBRightVertTB) ? -48 : -16)); j < ((right || bVertTileBoundary) ? 16 : 48); j += 32)
  1356. {
  1357. strIDCT4x4Stage1(p0 + j);
  1358. }
  1359. }
  1360. if (!bottom)
  1361. {
  1362. // In order to allow correction operation of corner chroma overlap operators (fixed)
  1363. // processing of left most MB column must be delayed by one MB
  1364. // Thus left MB not processed until leftAdjacentColumn = 1
  1365. for (j = ((left) ? 32 : ((leftAdjacentColumn || bOneMBRightVertTB) ? -64 : -32)); j < ((right || bVertTileBoundary) ? 0 : 32); j += 32)
  1366. {
  1367. strIDCT4x4Stage1(p1 + j);
  1368. }
  1369. }
  1370. //========================================
  1371. // first level inverse overlap (420_UV)
  1372. if (OL_NONE != olOverlap)
  1373. {
  1374. /* Corner operations */
  1375. /* Change because the top-left corner ICT will not have happened until leftAdjacentColumn ==1 */
  1376. if ((top || bHoriTileBoundary) && (leftAdjacentColumn || bOneMBRightVertTB))
  1377. strPost4_alternate(p1 - 64 + 0, p1 - 64 + 1, p1 - 64 + 2, p1 - 64 + 3);
  1378. if ((top || bHoriTileBoundary) && (right || bVertTileBoundary))
  1379. strPost4_alternate(p1 - 27, p1 - 28, p1 - 25, p1 - 26);
  1380. /* Change because the bottom-left corner ICT will not have happened until leftAdjacentColumn ==1 */
  1381. if ((bottom || bHoriTileBoundary) && (leftAdjacentColumn || bOneMBRightVertTB))
  1382. strPost4_alternate(p0 - 64 + 16 + 10, p0 - 64 + 16 + 11, p0 - 64 + 16 + 8, p0 - 64 + 16 + 9);
  1383. if ((bottom || bHoriTileBoundary) && (right || bVertTileBoundary))
  1384. strPost4_alternate(p0 - 1, p0 - 2, p0 - 3, p0 - 4);
  1385. if(!left && !top)
  1386. {
  1387. /* Change because the vertical 1-D overlap operations of the left edge pixels cannot be performed until leftAdjacentColumn ==1 */
  1388. if (leftAdjacentColumn || bOneMBRightVertTB)
  1389. {
  1390. if (!bottom && !bHoriTileBoundary)
  1391. {
  1392. strPost4_alternate(p0 - 64 + 26, p0 - 64 + 24, p1 - 64 + 0, p1 - 64 + 2);
  1393. strPost4_alternate(p0 - 64 + 27, p0 - 64 + 25, p1 - 64 + 1, p1 - 64 + 3);
  1394. }
  1395. strPost4_alternate(p0 - 64 + 10, p0 - 64 + 8, p0 - 64 + 16, p0 - 64 + 18);
  1396. strPost4_alternate(p0 - 64 + 11, p0 - 64 + 9, p0 - 64 + 17, p0 - 64 + 19);
  1397. }
  1398. if (bottom || bHoriTileBoundary)
  1399. {
  1400. p = p0 + -48;
  1401. strPost4_alternate(p + 15, p + 14, p + 42, p + 43);
  1402. strPost4_alternate(p + 13, p + 12, p + 40, p + 41);
  1403. p = NULL;
  1404. if (!right && !bVertTileBoundary)
  1405. {
  1406. p = p0 + -16;
  1407. strPost4_alternate(p + 15, p + 14, p + 42, p + 43);
  1408. strPost4_alternate(p + 13, p + 12, p + 40, p + 41);
  1409. p = NULL;
  1410. }
  1411. }
  1412. else
  1413. {
  1414. strPost4x4Stage1Split_alternate(p0 + -48, p1 - 16 + -48, 32);
  1415. if (!right && !bVertTileBoundary)
  1416. strPost4x4Stage1Split_alternate(p0 + -16, p1 - 16 + -16, 32);
  1417. }
  1418. if (right || bVertTileBoundary)
  1419. {
  1420. if (!bottom && !bHoriTileBoundary)
  1421. {
  1422. strPost4_alternate(p0 - 2 , p0 - 4 , p1 - 28, p1 - 26);
  1423. strPost4_alternate(p0 - 1 , p0 - 3 , p1 - 27, p1 - 25);
  1424. }
  1425. strPost4_alternate(p0 - 18, p0 - 20, p0 - 12, p0 - 10);
  1426. strPost4_alternate(p0 - 17, p0 - 19, p0 - 11, p0 - 9);
  1427. }
  1428. else
  1429. {
  1430. strPost4x4Stage1_alternate(p0 - 32, 32);
  1431. }
  1432. strPost4x4Stage1_alternate(p0 - 64, 32);
  1433. }
  1434. if (top || bHoriTileBoundary)
  1435. {
  1436. if (!left)
  1437. {
  1438. p = p1 + -64 + 4;
  1439. strPost4_alternate(p + 1, p + 0, p + 28, p + 29);
  1440. strPost4_alternate(p + 3, p + 2, p + 30, p + 31);
  1441. p = NULL;
  1442. }
  1443. if (!left && !right && !bVertTileBoundary)
  1444. {
  1445. p = p1 + -32 + 4;
  1446. strPost4_alternate(p + 1, p + 0, p + 28, p + 29);
  1447. strPost4_alternate(p + 3, p + 2, p + 30, p + 31);
  1448. p = NULL;
  1449. }
  1450. }
  1451. }
  1452. }
  1453. //================================================================
  1454. // 422_UV
  1455. for (i = 0; i < (YUV_422 == cfColorFormat? 2U : 0U) && tScale < 16; ++i)
  1456. {
  1457. PixelI* const p0 = pSC->p0MBbuffer[1 + i];//(0 == i ? pSC->pU0 : pSC->pV0);
  1458. PixelI* const p1 = pSC->p1MBbuffer[1 + i];//(0 == i ? pSC->pU1 : pSC->pV1);
  1459. //========================================
  1460. // second level inverse transform (422_UV)
  1461. if ((!bottomORright) && pSC->m_Dparam->cThumbnailScale < 16)
  1462. {
  1463. // 1D lossless HT
  1464. p1[0] -= ((p1[32] + 1) >> 1);
  1465. p1[32] += p1[0];
  1466. if (!pSC->m_param.bScaledArith) {
  1467. strDCT2x2dn(p1 + 0, p1 + 64, p1 + 16, p1 + 80);
  1468. strDCT2x2dn(p1 + 32, p1 + 96, p1 + 48, p1 + 112);
  1469. }
  1470. else {
  1471. strDCT2x2dnDec(p1 + 0, p1 + 64, p1 + 16, p1 + 80);
  1472. strDCT2x2dnDec(p1 + 32, p1 + 96, p1 + 48, p1 + 112);
  1473. }
  1474. }
  1475. //========================================
  1476. // second level inverse overlap (422_UV)
  1477. if (OL_TWO == olOverlap)
  1478. {
  1479. if ((leftAdjacentColumn || bOneMBRightVertTB) && (top || bHoriTileBoundary))
  1480. COMPUTE_CORNER_PRED_DIFF(p1 - 128 + 0, *(p1 - 128 + 64));
  1481. if ((rightAdjacentColumn || bOneMBLeftVertTB) && (top || bHoriTileBoundary))
  1482. iPredBefore[i][0] = *(p1 + 0);
  1483. if ((right || bVertTileBoundary) && (top || bHoriTileBoundary))
  1484. COMPUTE_CORNER_PRED_DIFF(p1 - 128 + 64, iPredBefore[i][0]);
  1485. if ((leftAdjacentColumn || bOneMBRightVertTB) && (bottom || bHoriTileBoundary))
  1486. COMPUTE_CORNER_PRED_DIFF(p0 - 128 + 48, *(p0 - 128 + 112));
  1487. if ((rightAdjacentColumn || bOneMBLeftVertTB) && (bottom || bHoriTileBoundary))
  1488. iPredBefore[i][1] = *(p0 + 48);
  1489. if ((right || bVertTileBoundary) && (bottom || bHoriTileBoundary))
  1490. COMPUTE_CORNER_PRED_DIFF(p0 - 128 + 112, iPredBefore[i][1]);
  1491. if (!bottom)
  1492. {
  1493. if (leftORright || bVertTileBoundary)
  1494. {
  1495. if (!top && !bHoriTileBoundary)
  1496. {
  1497. if (left || bVertTileBoundary)
  1498. strPost2_alternate(p0 + 48 + 0, p1 + 0);
  1499. if (right || bVertTileBoundary)
  1500. strPost2_alternate(p0 + 48 + -64, p1 + -64);
  1501. }
  1502. if (left || bVertTileBoundary)
  1503. strPost2_alternate(p1 + 16, p1 + 16 + 16);
  1504. if (right || bVertTileBoundary)
  1505. strPost2_alternate(p1 + -48, p1 + -48 + 16);
  1506. }
  1507. if (!leftORright && !bVertTileBoundary)
  1508. {
  1509. if (top || bHoriTileBoundary)
  1510. strPost2_alternate(p1 - 64, p1);
  1511. else
  1512. strPost2x2_alternate(p0 - 16, p0 + 48, p1 - 64, p1);
  1513. strPost2x2_alternate(p1 - 48, p1 + 16, p1 - 32, p1 + 32);
  1514. }
  1515. }
  1516. if ((bottom || bHoriTileBoundary) && (!leftORright && !bVertTileBoundary))
  1517. strPost2_alternate(p0 - 16, p0 + 48);
  1518. if ((leftAdjacentColumn || bOneMBRightVertTB) && (top || bHoriTileBoundary))
  1519. COMPUTE_CORNER_PRED_ADD(p1 - 128 + 0, *(p1 - 128 + 64));
  1520. if ((rightAdjacentColumn || bOneMBLeftVertTB) && (top || bHoriTileBoundary))
  1521. iPredAfter[i][0] = *(p1 + 0);
  1522. if ((right || bVertTileBoundary) && (top || bHoriTileBoundary))
  1523. COMPUTE_CORNER_PRED_ADD(p1 - 128 + 64, iPredAfter[i][0]);
  1524. if ((leftAdjacentColumn || bOneMBRightVertTB) && (bottom || bHoriTileBoundary))
  1525. COMPUTE_CORNER_PRED_ADD(p0 - 128 + 48, *(p0 - 128 + 112));
  1526. if ((rightAdjacentColumn || bOneMBLeftVertTB) && (bottom || bHoriTileBoundary))
  1527. iPredAfter[i][1] = *(p0 + 48);
  1528. if ((right || bVertTileBoundary) && (bottom || bHoriTileBoundary))
  1529. COMPUTE_CORNER_PRED_ADD(p0 - 128 + 112, iPredAfter[i][1]);
  1530. }
  1531. //========================================
  1532. // first level inverse transform (422_UV)
  1533. if(tScale >= 4) // bypass first level transform for 4:1 and smaller thumbnail
  1534. continue;
  1535. if (!top)
  1536. {
  1537. // Need to delay processing of left column until leftAdjacentColumn = 1 for corner overlap operators
  1538. // Since 422 has no vertical downsampling, no top MB delay of processing is necessary
  1539. for (j = (left ? 112 : ((leftAdjacentColumn || bOneMBRightVertTB) ? -80 : -16)); j < ((right || bVertTileBoundary) ? 48 : 112); j += 64)
  1540. {
  1541. strIDCT4x4Stage1(p0 + j);
  1542. }
  1543. }
  1544. if (!bottom)
  1545. {
  1546. // Need to delay processing of left column until leftAdjacentColumn = 1 for corner overlap operators
  1547. // Since 422 has no vertical downsampling, no top MB delay of processing is necessary
  1548. for (j = (left ? 64 : ((leftAdjacentColumn || bOneMBRightVertTB) ? -128 : -64)); j < ((right || bVertTileBoundary) ? 0 : 64); j += 64)
  1549. {
  1550. strIDCT4x4Stage1(p1 + j + 0);
  1551. strIDCT4x4Stage1(p1 + j + 16);
  1552. strIDCT4x4Stage1(p1 + j + 32);
  1553. }
  1554. }
  1555. //========================================
  1556. // first level inverse overlap (422_UV)
  1557. if (OL_NONE != olOverlap)
  1558. {
  1559. /* Corner operations */
  1560. if ((top || bHoriTileBoundary) && (leftAdjacentColumn || bOneMBRightVertTB))
  1561. strPost4_alternate(p1 - 128 + 0, p1 - 128 + 1, p1 - 128 + 2, p1 - 128 + 3);
  1562. if ((top || bHoriTileBoundary) && (right || bVertTileBoundary))
  1563. strPost4_alternate(p1 - 59, p1 - 60, p1 - 57, p1 - 58);
  1564. if ((bottom || bHoriTileBoundary) && (leftAdjacentColumn || bOneMBRightVertTB))
  1565. strPost4_alternate(p0 - 128 + 48 + 10, p0 - 128 + 48 + 11, p0 - 128 + 48 + 8, p0 - 128 + 48 + 9);
  1566. if ((bottom || bHoriTileBoundary) && (right || bVertTileBoundary))
  1567. strPost4_alternate(p0 - 1, p0 - 2, p0 - 3, p0 - 4);
  1568. if (!top)
  1569. {
  1570. // Need to delay processing of left column until leftAdjacentColumn = 1 for corner overlap operators
  1571. if (leftAdjacentColumn || bOneMBRightVertTB) {
  1572. p = p0 + 32 + 10 - 128;
  1573. strPost4_alternate(p + 0, p - 2, p + 6, p + 8);
  1574. strPost4_alternate(p + 1, p - 1, p + 7, p + 9);
  1575. p = NULL;
  1576. }
  1577. if (right || bVertTileBoundary) {
  1578. p = p0 + -32 + 14;
  1579. strPost4_alternate(p + 0, p - 2, p + 6, p + 8);
  1580. strPost4_alternate(p + 1, p - 1, p + 7, p + 9);
  1581. p = NULL;
  1582. }
  1583. for (j = (left ? 0 : -128); j < ((right || bVertTileBoundary) ? -64 : 0); j += 64)
  1584. strPost4x4Stage1_alternate(p0 + j + 32, 0);
  1585. }
  1586. if (!bottom)
  1587. {
  1588. // Need to delay processing of left column until leftAdjacentColumn = 1 for corner overlap operators
  1589. if (leftAdjacentColumn || bOneMBRightVertTB)
  1590. {
  1591. p = p1 + 0 + 10 - 128;
  1592. strPost4_alternate(p + 0, p - 2, p + 6, p + 8);
  1593. strPost4_alternate(p + 1, p - 1, p + 7, p + 9);
  1594. p += 16;
  1595. strPost4_alternate(p + 0, p - 2, p + 6, p + 8);
  1596. strPost4_alternate(p + 1, p - 1, p + 7, p + 9);
  1597. p = NULL;
  1598. }
  1599. if (right || bVertTileBoundary)
  1600. {
  1601. p = p1 + -64 + 14;
  1602. strPost4_alternate(p + 0, p - 2, p + 6, p + 8);
  1603. strPost4_alternate(p + 1, p - 1, p + 7, p + 9);
  1604. p += 16;
  1605. strPost4_alternate(p + 0, p - 2, p + 6, p + 8);
  1606. strPost4_alternate(p + 1, p - 1, p + 7, p + 9);
  1607. p = NULL;
  1608. }
  1609. for (j = (left ? 0 : -128); j < ((right || bVertTileBoundary) ? -64 : 0); j += 64)
  1610. {
  1611. strPost4x4Stage1_alternate(p1 + j + 0, 0);
  1612. strPost4x4Stage1_alternate(p1 + j + 16, 0);
  1613. }
  1614. }
  1615. if (topORbottom || bHoriTileBoundary)
  1616. {
  1617. if (top || bHoriTileBoundary) {
  1618. p = p1 + 5;
  1619. for (j = (left ? 0 : -128); j < ((right || bVertTileBoundary) ? -64 : 0); j += 64)
  1620. {
  1621. strPost4_alternate(p + j + 0, p + j - 1, p + j + 59, p + j + 60);
  1622. strPost4_alternate(p + j + 2, p + j + 1, p + j + 61, p + j + 62);
  1623. }
  1624. p = NULL;
  1625. }
  1626. if (bottom || bHoriTileBoundary) {
  1627. p = p0 + 48 + 13;
  1628. for (j = (left ? 0 : -128); j < ((right || bVertTileBoundary) ? -64 : 0); j += 64)
  1629. {
  1630. strPost4_alternate(p + j + 0, p + j - 1, p + j + 59, p + j + 60);
  1631. strPost4_alternate(p + j + 2, p + j + 1, p + j + 61, p + j + 62);
  1632. }
  1633. p = NULL;
  1634. }
  1635. }
  1636. else
  1637. {
  1638. // Need to delay processing of left column until leftAdjacentColumn = 1 for corner overlap operators
  1639. if (leftAdjacentColumn || bOneMBRightVertTB)
  1640. {
  1641. j = 0 + 0 - 128;
  1642. strPost4_alternate(p0 + j + 48 + 10 + 0, p0 + j + 48 + 10 - 2, p1 + j + 0, p1 + j + 2);
  1643. strPost4_alternate(p0 + j + 48 + 10 + 1, p0 + j + 48 + 10 - 1, p1 + j + 1, p1 + j + 3);
  1644. }
  1645. if (right || bVertTileBoundary)
  1646. {
  1647. j = -64 + 4;
  1648. strPost4_alternate(p0 + j + 48 + 10 + 0, p0 + j + 48 + 10 - 2, p1 + j + 0, p1 + j + 2);
  1649. strPost4_alternate(p0 + j + 48 + 10 + 1, p0 + j + 48 + 10 - 1, p1 + j + 1, p1 + j + 3);
  1650. }
  1651. for (j = (left ? 0 : -128); j < ((right || bVertTileBoundary) ? -64 : 0); j += 64)
  1652. strPost4x4Stage1Split_alternate(p0 + j + 48, p1 + j + 0, 0);
  1653. }
  1654. }
  1655. }
  1656. return ICERR_OK;
  1657. }