vq_tm.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. /* Copyright (C) 2007 Hong Zhiqian */
  2. /**
  3. @file vq_tm.h
  4. @author Hong Zhiqian
  5. @brief Various compatibility routines for Speex (TriMedia version)
  6. */
  7. /*
  8. Redistribution and use in source and binary forms, with or without
  9. modification, are permitted provided that the following conditions
  10. are met:
  11. - Redistributions of source code must retain the above copyright
  12. notice, this list of conditions and the following disclaimer.
  13. - Redistributions in binary form must reproduce the above copyright
  14. notice, this list of conditions and the following disclaimer in the
  15. documentation and/or other materials provided with the distribution.
  16. - Neither the name of the Xiph.org Foundation nor the names of its
  17. contributors may be used to endorse or promote products derived from
  18. this software without specific prior written permission.
  19. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
  23. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  24. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  25. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  26. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  27. LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  28. NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  29. SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. */
  31. #include <ops/custom_defs.h>
  32. #include "profile_tm.h"
  33. #ifdef FIXED_POINT
  34. inline void vq_nbest_dist(int i, int N, int dist, int *used, int *nbest, Int32 *best_dist)
  35. {
  36. register int k;
  37. if (i<N || dist<best_dist[N-1])
  38. {
  39. for (k=N-1; (k >= 1) && (k > *used || dist < best_dist[k-1]); k--)
  40. { best_dist[k]=best_dist[k-1];
  41. nbest[k] = nbest[k-1];
  42. }
  43. best_dist[k]=dist;
  44. nbest[k]=i;
  45. *used++;
  46. }
  47. }
  48. void vq_nbest_5(Int16 *in, const Int16 *codebook, int entries, Int32 *E, int N, int *nbest, Int32 *best_dist)
  49. {
  50. register int i, j;
  51. register int in10, in32, in4;
  52. int used = 0;
  53. in10 = pack16lsb(in[1],in[0]); /* Note: memory is not align here */
  54. in32 = pack16lsb(in[3],in[2]);
  55. in4 = sex16(in[4]);
  56. #if (TM_UNROLL && TM_UNROLL_VQNBEST > 0)
  57. #pragma TCS_unroll=2
  58. #pragma TCS_unrollexact=1
  59. #endif
  60. for ( i=0,j=0 ; i<entries ; i+=2,j+=5 )
  61. {
  62. register int dist1, dist2;
  63. register int cb10, cb32, cb54, cb76, cb98, cb87, cb65;
  64. cb10 = ld32x(codebook,j);
  65. cb32 = ld32x(codebook,j+1);
  66. cb54 = ld32x(codebook,j+2);
  67. cb76 = ld32x(codebook,j+3);
  68. cb98 = ld32x(codebook,j+4);
  69. dist1 = sex16(cb54) * in4;
  70. dist1 += ifir16(in10,cb10) + ifir16(in32,cb32);
  71. dist1 = (E[i] >> 1) - dist1;
  72. cb65 = funshift2(cb76,cb54);
  73. cb87 = funshift2(cb98,cb76);
  74. dist2 = asri(16,cb98) * in4;
  75. dist2 += ifir16(in10,cb65) + ifir16(in32,cb87);
  76. dist2 = (E[i+1] >> 1) - dist2;
  77. vq_nbest_dist(i,N,dist1,&used,nbest,best_dist);
  78. vq_nbest_dist(i+1,N,dist2,&used,nbest,best_dist);
  79. }
  80. #if (TM_UNROLL && TM_UNROLL_VQNBEST > 0)
  81. #pragma TCS_unrollexact=0
  82. #pragma TCS_unroll=0
  83. #endif
  84. }
  85. void vq_nbest_8(Int16 *in, const Int16 *codebook, int entries, Int32 *E, int N, int *nbest, Int32 *best_dist)
  86. {
  87. register int i, j;
  88. register int in10, in32, in54, in76;
  89. int used = 0;
  90. in10 = pack16lsb(in[1],in[0]); /* Note: memory is not align here */
  91. in32 = pack16lsb(in[3],in[2]);
  92. in54 = pack16lsb(in[5],in[4]);
  93. in76 = pack16lsb(in[7],in[6]);
  94. #if (TM_UNROLL && TM_UNROLL_VQNBEST > 0)
  95. #pragma TCS_unroll=4
  96. #pragma TCS_unrollexact=1
  97. #endif
  98. for ( i=0,j=0 ; i<entries ; ++i,j+=4 )
  99. {
  100. register int dist;
  101. register int cb10, cb32, cb54, cb76;
  102. cb10 = ld32x(codebook,j);
  103. cb32 = ld32x(codebook,j+1);
  104. cb54 = ld32x(codebook,j+2);
  105. cb76 = ld32x(codebook,j+3);
  106. dist = ifir16(in10,cb10) + ifir16(in32,cb32);
  107. dist += ifir16(in54,cb54) + ifir16(in76,cb76);
  108. dist = (E[i] >> 1) - dist;
  109. vq_nbest_dist(i,N,dist,&used,nbest,best_dist);
  110. }
  111. #if (TM_UNROLL && TM_UNROLL_VQNBEST > 0)
  112. #pragma TCS_unrollexact=0
  113. #pragma TCS_unroll=0
  114. #endif
  115. }
  116. void vq_nbest_10(Int16 *in, const Int16 *codebook, int entries, Int32 *E, int N, int *nbest, Int32 *best_dist)
  117. {
  118. register int i, j;
  119. register int in10, in32, in54, in76, in98;
  120. int used = 0;
  121. in10 = pack16lsb(in[1],in[0]);
  122. in32 = pack16lsb(in[3],in[2]);
  123. in54 = pack16lsb(in[5],in[4]);
  124. in76 = pack16lsb(in[7],in[6]);
  125. in98 = pack16lsb(in[9],in[8]);
  126. #if (TM_UNROLL && TM_UNROLL_VQNBEST > 0)
  127. #pragma TCS_unroll=4
  128. #pragma TCS_unrollexact=1
  129. #endif
  130. for ( i=0,j=0 ; i<entries ; ++i,j+=5 )
  131. {
  132. register int dist;
  133. register int cb10, cb32, cb54, cb76, cb98;
  134. cb10 = ld32x(codebook,j);
  135. cb32 = ld32x(codebook,j+1);
  136. cb54 = ld32x(codebook,j+2);
  137. cb76 = ld32x(codebook,j+3);
  138. cb98 = ld32x(codebook,j+4);
  139. dist = ifir16(in10,cb10) + ifir16(in32,cb32);
  140. dist += ifir16(in54,cb54) + ifir16(in76,cb76);
  141. dist += ifir16(in98,cb98);
  142. dist = (E[i] >> 1) - dist;
  143. vq_nbest_dist(i,N,dist,&used,nbest,best_dist);
  144. }
  145. #if (TM_UNROLL && TM_UNROLL_VQNBEST > 0)
  146. #pragma TCS_unrollexact=0
  147. #pragma TCS_unroll=0
  148. #endif
  149. }
  150. void vq_nbest_20(Int16 *in, const Int16 *codebook, int entries, Int32 *E, int N, int *nbest, Int32 *best_dist)
  151. {
  152. register int i, j;
  153. register int in10, in32, in54, in76, in98, in_10, in_32, in_54, in_76, in_98;
  154. int used = 0;
  155. in10 = pack16lsb(in[1],in[0]); /* Note: memory is not align here */
  156. in32 = pack16lsb(in[3],in[2]);
  157. in54 = pack16lsb(in[5],in[4]);
  158. in76 = pack16lsb(in[6],in[6]);
  159. in98 = pack16lsb(in[9],in[8]);
  160. in_10 = pack16lsb(in[11],in[10]);
  161. in_32 = pack16lsb(in[13],in[12]);
  162. in_54 = pack16lsb(in[15],in[14]);
  163. in_76 = pack16lsb(in[17],in[16]);
  164. in_98 = pack16lsb(in[19],in[18]);
  165. #if (TM_UNROLL && TM_UNROLL_VQNBEST > 0)
  166. #pragma TCS_unroll=2
  167. #pragma TCS_unrollexact=1
  168. #endif
  169. for ( i=0,j=0 ; i<entries ; ++i,j+=10 )
  170. {
  171. register int dist;
  172. register int cb10, cb32, cb54, cb76, cb98, cb_10, cb_32, cb_54, cb_76, cb_98;
  173. cb10 = ld32x(codebook,j);
  174. cb32 = ld32x(codebook,j+1);
  175. cb54 = ld32x(codebook,j+2);
  176. cb76 = ld32x(codebook,j+3);
  177. cb98 = ld32x(codebook,j+4);
  178. cb_10 = ld32x(codebook,j+5);
  179. cb_32 = ld32x(codebook,j+6);
  180. cb_54 = ld32x(codebook,j+7);
  181. cb_76 = ld32x(codebook,j+8);
  182. cb_98 = ld32x(codebook,j+9);
  183. dist = ifir16(in10,cb10) + ifir16(in32,cb32);
  184. dist += ifir16(in54,cb54) + ifir16(in76,cb76);
  185. dist += ifir16(in98,cb98) + ifir16(in_10,cb_10);
  186. dist += ifir16(in_32,cb_32) + ifir16(in_54,cb_54);
  187. dist += ifir16(in_76,cb_76) + ifir16(in_98,cb_98);
  188. dist = (E[i] >> 1) - dist;
  189. vq_nbest_dist(i,N,dist,&used,nbest,best_dist);
  190. }
  191. #if (TM_UNROLL && TM_UNROLL_VQNBEST > 0)
  192. #pragma TCS_unrollexact=0
  193. #pragma TCS_unroll=0
  194. #endif
  195. }
  196. #define OVERRIDE_VQ_NBEST
  197. void vq_nbest (Int16 *in, const Int16 *codebook, int len, int entries, Int32 *E, int N, int *nbest, Int32 *best_dist, char *stack)
  198. {
  199. TMDEBUG_ALIGNMEM(codebook);
  200. VQNBEST_START();
  201. if( len==5 )
  202. vq_nbest_5(in,codebook,entries,E,N,nbest,best_dist);
  203. else if ( len==8 )
  204. vq_nbest_8(in,codebook,entries,E,N,nbest,best_dist);
  205. else if ( len==10 )
  206. vq_nbest_10(in,codebook,entries,E,N,nbest,best_dist);
  207. else if ( len==20 )
  208. vq_nbest_20(in,codebook,entries,E,N,nbest,best_dist);
  209. #ifndef REMARK_ON
  210. (void)stack;
  211. #endif
  212. VQNBEST_STOP();
  213. }
  214. inline void vq_nbest_sign_dist(int i, int N, int dist, int sign, int entries, int *used, int *nbest, Int32 *best_dist)
  215. {
  216. register int k;
  217. if (i<N || dist<best_dist[N-1])
  218. { for (k=N-1; (k >= 1) && (k > *used || dist < best_dist[k-1]); k--)
  219. {
  220. best_dist[k]=best_dist[k-1];
  221. nbest[k] = nbest[k-1];
  222. }
  223. if ( sign ) i += entries;
  224. best_dist[k]=dist;
  225. *used++;
  226. nbest[k] = i;
  227. }
  228. }
  229. void vq_nbest_sign_5(Int16 *in, const Int16 *codebook, int entries, Int32 *E, int N, int *nbest, Int32 *best_dist)
  230. {
  231. register int i, j;
  232. register int in10, in32, in4;
  233. int used = 0;
  234. in10 = ld32(in);
  235. in32 = ld32x(in,1);
  236. in4 = sex16(in[4]);
  237. #if (TM_UNROLL && TM_UNROLL_VQSIGNNBEST > 0)
  238. #pragma TCS_unroll=2
  239. #pragma TCS_unrollexact=1
  240. #endif
  241. for ( i=0,j=0 ; i<entries ; i+=2,j+=5 )
  242. {
  243. register int dist1, dist2, sign1, sign2;
  244. register int cb10, cb32, cb54, cb76, cb98, cb87, cb65;
  245. cb10 = ld32x(codebook,j);
  246. cb32 = ld32x(codebook,j+1);
  247. cb54 = ld32x(codebook,j+2);
  248. cb76 = ld32x(codebook,j+3);
  249. cb98 = ld32x(codebook,j+4);
  250. dist1 = sex16(cb54) * in4;
  251. dist1 += ifir16(in10,cb10) + ifir16(in32,cb32);
  252. sign1 = mux(dist1>0,0,1);
  253. dist1 = iflip(dist1>0,dist1);
  254. dist1 = (E[i] >> 1) + dist1;
  255. cb65 = funshift2(cb76,cb54);
  256. cb87 = funshift2(cb98,cb76);
  257. dist2 = asri(16,cb98) * in4;
  258. dist2 += ifir16(in10,cb65) + ifir16(in32,cb87);
  259. sign2 = mux(dist2>0,0,1);
  260. dist2 = iflip(dist2>0,dist2);
  261. dist2 = (E[i] >> 1) + dist2;
  262. vq_nbest_sign_dist(i,N,dist1,sign1,entries,&used,nbest,best_dist);
  263. vq_nbest_sign_dist(i+1,N,dist2,sign2,entries,&used,nbest,best_dist);
  264. }
  265. #if (TM_UNROLL && TM_UNROLL_VQSIGNNBEST > 0)
  266. #pragma TCS_unrollexact=0
  267. #pragma TCS_unroll=0
  268. #endif
  269. }
  270. void vq_nbest_sign_8(Int16 *in, const Int16 *codebook, int entries, Int32 *E, int N, int *nbest, Int32 *best_dist)
  271. {
  272. register int i, j;
  273. register int sign;
  274. register int in10, in32, in54, in76;
  275. int used = 0;
  276. in10 = ld32(in);
  277. in32 = ld32x(in,1);
  278. in54 = ld32x(in,2);
  279. in76 = ld32x(in,3);
  280. #if (TM_UNROLL && TM_UNROLL_VQSIGNNBEST > 0)
  281. #pragma TCS_unroll=4
  282. #pragma TCS_unrollexact=1
  283. #endif
  284. for ( i=0,j=0 ; i<entries ; ++i,j+=4 )
  285. {
  286. register int dist;
  287. register int cb10, cb32, cb54, cb76;
  288. cb10 = ld32x(codebook,j);
  289. cb32 = ld32x(codebook,j+1);
  290. cb54 = ld32x(codebook,j+2);
  291. cb76 = ld32x(codebook,j+3);
  292. dist = ifir16(in10,cb10) + ifir16(in32,cb32);
  293. dist += ifir16(in54,cb54) + ifir16(in76,cb76);
  294. sign = mux(dist>0,0,1);
  295. dist = iflip(dist>0,dist);
  296. dist = (E[i] >> 1) + dist;
  297. vq_nbest_sign_dist(i,N,dist,sign,entries,&used,nbest,best_dist);
  298. }
  299. #if (TM_UNROLL && TM_UNROLL_VQSIGNNBEST > 0)
  300. #pragma TCS_unrollexact=0
  301. #pragma TCS_unroll=0
  302. #endif
  303. }
  304. void vq_nbest_sign_10(Int16 *in, const Int16 *codebook, int entries, Int32 *E, int N, int *nbest, Int32 *best_dist)
  305. {
  306. register int i, j;
  307. register int sign;
  308. register int in10, in32, in54, in76, in98;
  309. int used = 0;
  310. in10 = ld32(in);
  311. in32 = ld32x(in,1);
  312. in54 = ld32x(in,2);
  313. in76 = ld32x(in,3);
  314. in98 = ld32x(in,4);
  315. #if (TM_UNROLL && TM_UNROLL_VQSIGNNBEST > 0)
  316. #pragma TCS_unroll=4
  317. #pragma TCS_unrollexact=1
  318. #endif
  319. for ( i=0,j=0 ; i<entries ; ++i,j+=5 )
  320. {
  321. register int dist;
  322. register int cb10, cb32, cb54, cb76, cb98;
  323. cb10 = ld32x(codebook,j);
  324. cb32 = ld32x(codebook,j+1);
  325. cb54 = ld32x(codebook,j+2);
  326. cb76 = ld32x(codebook,j+3);
  327. cb98 = ld32x(codebook,j+4);
  328. dist = ifir16(in10,cb10) + ifir16(in32,cb32);
  329. dist += ifir16(in54,cb54) + ifir16(in76,cb76);
  330. dist += ifir16(in98,cb98);
  331. sign = mux(dist>0,0,1);
  332. dist = iflip(dist>0,dist);
  333. dist = (E[i] >> 1) + dist;
  334. vq_nbest_sign_dist(i,N,dist,sign,entries,&used,nbest,best_dist);
  335. }
  336. #if (TM_UNROLL && TM_UNROLL_VQSIGNNBEST > 0)
  337. #pragma TCS_unrollexact=0
  338. #pragma TCS_unroll=0
  339. #endif
  340. }
  341. void vq_nbest_sign_20(Int16 *in, const Int16 *codebook, int entries, Int32 *E, int N, int *nbest, Int32 *best_dist)
  342. {
  343. register int i, j;
  344. register int sign;
  345. register int in10, in32, in54, in76, in98, in_10, in_32, in_54, in_76, in_98;
  346. int used = 0;
  347. in10 = ld32(in);
  348. in32 = ld32x(in,1);
  349. in54 = ld32x(in,2);
  350. in76 = ld32x(in,3);
  351. in98 = ld32x(in,4);
  352. in_10 = ld32x(in,5);
  353. in_32 = ld32x(in,6);
  354. in_54 = ld32x(in,7);
  355. in_76 = ld32x(in,8);
  356. in_98 = ld32x(in,9);
  357. #if (TM_UNROLL && TM_UNROLL_VQSIGNNBEST > 0)
  358. #pragma TCS_unroll=2
  359. #pragma TCS_unrollexact=1
  360. #endif
  361. for ( i=0,j=0 ; i<entries ; ++i,j+=10 )
  362. {
  363. register int dist;
  364. register int cb10, cb32, cb54, cb76, cb98, cb_10, cb_32, cb_54, cb_76, cb_98;
  365. cb10 = ld32x(codebook,j);
  366. cb32 = ld32x(codebook,j+1);
  367. cb54 = ld32x(codebook,j+2);
  368. cb76 = ld32x(codebook,j+3);
  369. cb98 = ld32x(codebook,j+4);
  370. cb_10 = ld32x(codebook,j+5);
  371. cb_32 = ld32x(codebook,j+6);
  372. cb_54 = ld32x(codebook,j+7);
  373. cb_76 = ld32x(codebook,j+8);
  374. cb_98 = ld32x(codebook,j+9);
  375. dist = ifir16(in10,cb10) + ifir16(in32,cb32);
  376. dist += ifir16(in54,cb54) + ifir16(in76,cb76);
  377. dist += ifir16(in98,cb98) + ifir16(in_10,cb_10);
  378. dist += ifir16(in_32,cb_32) + ifir16(in_54,cb_54);
  379. dist += ifir16(in_76,cb_76) + ifir16(in_98,cb_98);
  380. sign = mux(dist>0,0,1);
  381. dist = iflip(dist>0,dist);
  382. dist = (E[i] >> 1) + dist;
  383. vq_nbest_sign_dist(i,N,dist,sign,entries,&used,nbest,best_dist);
  384. }
  385. #if (TM_UNROLL && TM_UNROLL_VQSIGNNBEST > 0)
  386. #pragma TCS_unrollexact=0
  387. #pragma TCS_unroll=0
  388. #endif
  389. }
  390. #define OVERRIDE_VQ_NBEST_SIGN
  391. void vq_nbest_sign (Int16 *in, const Int16 *codebook, int len, int entries, Int32 *E, int N, int *nbest, Int32 *best_dist, char *stack)
  392. {
  393. TMDEBUG_ALIGNMEM(in);
  394. TMDEBUG_ALIGNMEM(codebook);
  395. VQNBESTSIGN_START();
  396. if( len==5 )
  397. vq_nbest_sign_5(in,codebook,entries,E,N,nbest,best_dist);
  398. else if ( len==8 )
  399. vq_nbest_sign_8(in,codebook,entries,E,N,nbest,best_dist);
  400. else if ( len==10 )
  401. vq_nbest_sign_10(in,codebook,entries,E,N,nbest,best_dist);
  402. else if ( len==20 )
  403. vq_nbest_sign_20(in,codebook,entries,E,N,nbest,best_dist);
  404. #ifndef REMARK_ON
  405. (void)stack;
  406. #endif
  407. VQNBESTSIGN_STOP();
  408. }
  409. #endif