123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479 |
- /* Copyright (C) 2007 Hong Zhiqian */
- /**
- @file ltp_tm.h
- @author Hong Zhiqian
- @brief Various compatibility routines for Speex (TriMedia version)
- */
- /*
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- - Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- - Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-
- - Neither the name of the Xiph.org Foundation nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
- CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
- #include <ops/custom_defs.h>
- #include "profile_tm.h"
- #ifdef FIXED_POINT
- #define OVERRIDE_INNER_PROD
- Int32 inner_prod(const Int16 * restrict x, const Int16 * restrict y, int len)
- {
- register int sum = 0;
- INNERPROD_START();
- if ( (int)x & 0x03 == 0 && (int)y & 0x03 == 0 )
- {
- register int i;
- len >>= 1;
- for ( i=0 ; i<len ; i+=4 )
- {
- register int x0, x1, y0, y1, x2, x3, y2, y3;
- x0 = ld32x(x,i);
- y0 = ld32x(x,i);
- x1 = ld32x(x,i+1);
- y1 = ld32x(y,i+1);
- sum += (ifir16(x0,y0) + ifir16(x1,y1)) >> 6;
- x2 = ld32x(x,i+2);
- y2 = ld32x(x,i+2);
- x3 = ld32x(x,i+3);
- y3 = ld32x(x,i+3);
- sum += (ifir16(x2,y2) + ifir16(x3,y3)) >> 6;
- }
- } else
- {
- len >>= 3;
- while( len-- )
- {
- register int x0, x1, x2, x3, y0, y1, y2, y3;
- x0 = pack16lsb(x[0],x[1]);
- y0 = pack16lsb(y[0],y[1]);
- x1 = pack16lsb(x[2],x[3]);
- y1 = pack16lsb(y[2],y[3]);
- sum += (ifir16(x0,y0) + ifir16(x1,y1)) >> 6;
- x2 = pack16lsb(x[4],x[5]);
- y2 = pack16lsb(y[4],y[5]);
- x3 = pack16lsb(x[6],x[7]);
- y3 = pack16lsb(y[6],y[7]);
- sum += (ifir16(x2,y2) + ifir16(x3,y3)) >> 6;
- x += 8;
- y += 8;
- }
- }
- INNERPROD_STOP();
- return sum;
- }
- #define OVERRIDE_PITCH_XCORR
- void pitch_xcorr(const Int16 *_x, const Int16 *_y, Int32 *corr, int len, int nb_pitch, char *stack)
- {
- register int sum_1, sum_2, sum_3, sum_4;
- register int y10, y32, y54, y76, y21, y43, y65;
- register int x10, x32;
- register int i, j, k, limit;
-
- TMDEBUG_ALIGNMEM(_x);
- TMDEBUG_ALIGNMEM(_y);
- PITCHXCORR_START();
- limit = nb_pitch >> 1;
- len >>= 1;
- for (i=0 ; i<limit ; i+=2 )
- {
- sum_1 = sum_2 = sum_3 = sum_4 = 0;
-
- y10 = ld32x(_y,i);
- y32 = ld32x(_y,i+1);
-
- for ( j=0 ; j<len ; j+=2 )
- {
- x10 = ld32x(_x,j);
- x32 = ld32x(_x,j+1);
- y54 = ld32x(_y,i+j+2);
- y76 = ld32x(_y,i+j+3);
-
- sum_1 += (ifir16(x10,y10) + ifir16(x32,y32)) >> 6;
- sum_3 += (ifir16(x10,y32) + ifir16(x32,y54)) >> 6;
- y21 = funshift2(y32,y10);
- y43 = funshift2(y54,y32);
- y65 = funshift2(y76,y54);
- sum_2 += (ifir16(x10,y21) + ifir16(x32,y43)) >> 6;
- sum_4 += (ifir16(x10,y43) + ifir16(x32,y65)) >> 6;
- y10 = y54;
- y32 = y76;
- }
- k = i << 1;
- corr[nb_pitch-1-k]=sum_1;
- corr[nb_pitch-2-k]=sum_2;
- corr[nb_pitch-3-k]=sum_3;
- corr[nb_pitch-4-k]=sum_4;
- }
- #ifndef REMARK_ON
- (void)stack;
- #endif
- PITCHXCORR_STOP();
- }
- #ifndef ttisim
- #define OVERRIDE_PITCH_GAIN_SEARCH_3TAP_VQ
- static int pitch_gain_search_3tap_vq
- (
- const signed char *gain_cdbk,
- int gain_cdbk_size,
- Int16 *C16,
- Int16 max_gain
- )
- {
- register int pp = 0x00400040, p=64;
- register int g10, g2, g20, g21, g02, g22, g01;
- register int cb0, cb1, cb2, cb5432;
- register int C10, C32, C54, C76, C98, C83, C2;
- register int acc0, acc1, acc2, acc3, sum, gsum, bsum=-VERY_LARGE32;
- register int i, best_cdbk=0;
- register Int16 tmp;
- TMDEBUG_ALIGNMEM(C16);
- TMDEBUG_ALIGNMEM(gain_cdbk+2);
- PITCHGAINSEARCH3TAPVQ_START();
- tmp = ild16(gain_cdbk);
- C98 = ld32x(C16,4);
- C32 = ld32x(C16,1);
- C10 = ld32(C16);
- C54 = ld32x(C16,2);
- C76 = ld32x(C16,3);
- cb0 = sex8(tmp);
- cb1 = sex8(tmp>>8);
- C83 = funshift2(C98,C32);
- C2 = sex16(C32);
- gain_cdbk += 2;
- #if (TM_UNROLL && TM_UNROLL_PITCHGAINSEARCH3TAPVQ > 0)
- #pragma TCS_unroll=4
- #pragma TCS_unrollexact=1
- #endif
- for ( i=0 ; i<gain_cdbk_size ; ++i )
- {
- cb5432 = ld32x(gain_cdbk,i);
- cb2 = sex8(cb5432);
- gsum = sex8(cb5432>>8);
- sum = 0;
-
- g10 = pack16lsb(cb1 + 32, cb0 + 32);
- g2 = cb2 + 32;
- g02 = pack16lsb(g10, g2);
- acc0 = dspidualmul(g10,pp);
- sum += ifir16(acc0,C10);
- sum += p * g2 * C2;
- g22 = pack16lsb(g02, g02);
- g01 = funshift2(g10, g10);
- acc1 = dspidualmul(g22, g01);
- sum -= ifir16(acc1, C54);
- acc2 = dspidualmul(g10, g10);
- sum -= ifir16(acc2, C76);
- g20 = pack16lsb(g2, g10);
- g21 = funshift2(g2, g10);
- acc3 = dspidualmul(g20, g21);
- sum -= ifir16(acc3, C83);
-
- if ( sum>bsum && gsum<=max_gain )
- { bsum = sum;
- best_cdbk=i;
- }
- cb0 = sex8(cb5432 >> 16);
- cb1 = asri(24,cb5432);
- }
- #if (TM_UNROLL && TM_UNROLL_PITCHGAINSEARCH3TAPVQ > 0)
- #pragma TCS_unrollexact=0
- #pragma TCS_unroll=0
- #endif
- PITCHGAINSEARCH3TAPVQ_STOP();
- return best_cdbk;
- }
- #endif
- #define OVERRIDE_COMPUTE_PITCH_ERROR
- #ifndef OVERRIDE_PITCH_GAIN_SEARCH_3TAP_VQ
- inline Int32 compute_pitch_error(Int16 *C, Int16 *g, Int16 pitch_control)
- {
- register int c10, c32, c54, c76, c98, c83;
- register int g10, g32, g02, g22, g01, g21, g20;
- register int pp, tmp0, tmp1, tmp2, tmp3;
- register int sum = 0;
-
- COMPUTEPITCHERROR_START();
- g10 = ld32(g);
- g32 = ld32x(g,1);
- pp = pack16lsb(pitch_control,pitch_control);
- c10 = ld32(C);
- c32 = ld32x(C,1);
- g02 = pack16lsb(g10,g32);
- g22 = pack16lsb(g32,g32);
- g01 = funshift2(g10,g10);
- tmp0 = dspidualmul(g10,pp);
- sum += ifir16(tmp0, c10);
- sum += pitch_control * sex16(g32) * sex16(c32);
- c54 = ld32x(C,2);
- c76 = ld32x(C,3);
- c98 = ld32x(C,4);
- tmp1 = dspidualmul(g22,g01);
- sum -= ifir16(tmp1, c54);
- tmp2 = dspidualmul(g10,g10);
- sum -= ifir16(tmp2,c76);
- c83 = funshift2(c98,c32);
- g20 = funshift2(g02,g02);
- g21 = funshift2(g02,g10);
- tmp3 = dspidualmul(g20,g21);
- sum -= ifir16(tmp3,c83);
- COMPUTEPITCHERROR_STOP();
- return sum;
- }
- #endif
- #define OVERRIDE_OPEN_LOOP_NBEST_PITCH
- void open_loop_nbest_pitch(Int16 *sw, int start, int end, int len, int *pitch, Int16 *gain, int N, char *stack)
- {
- VARDECL(int *best_score);
- VARDECL(int *best_ener);
- VARDECL(Int32 *corr);
- VARDECL(Int16 *corr16);
- VARDECL(Int16 *ener16);
- register int i, j, k, l, N4, N2;
- register int _sw10, _sw32, _s0, _s2, limit;
- register int *energy;
- register int cshift=0, eshift=0;
- register int scaledown = 0;
- register int e0, _energy0;
- ALLOC(corr16, end-start+1, Int16);
- ALLOC(ener16, end-start+1, Int16);
- ALLOC(corr, end-start+1, Int32);
- ALLOC(best_score, N, int);
- ALLOC(best_ener, N, int);
- energy = corr;
- N4 = N << 2;
- N2 = N >> 1;
- TMDEBUG_ALIGNMEM(sw);
- TMDEBUG_ALIGNMEM(pitch);
- TMDEBUG_ALIGNMEM(gain);
- TMDEBUG_ALIGNMEM(best_score);
- TMDEBUG_ALIGNMEM(best_ener);
- TMDEBUG_ALIGNMEM(corr16);
- TMDEBUG_ALIGNMEM(ener16);
- OPENLOOPNBESTPITCH_START();
- for ( i=0 ; i<N4 ; i+=4 )
- { st32d(i,best_score,-1);
- st32d(i,best_ener,0);
- st32d(i,pitch,start);
- }
- for ( j=asri(1,-end) ; j<N2 ; ++j )
- { register int _sw10;
- _sw10 = ld32x(sw,j);
- _sw10 = dspidualabs(_sw10);
- if ( _sw10 & 0xC000C000 )
- { scaledown = 1;
- break;
- }
- }
- if ( scaledown )
- {
- for ( j=asri(1,-end),k=asli(1,-end) ; j<N2 ; ++j,k+=4 )
- { register int _sw10;
-
- _sw10 = ld32x(sw,j);
- _sw10 = dualasr(_sw10,1);
- st32d(k, sw, _sw10);
- }
- }
- energy[0] = _energy0 = inner_prod(sw-start, sw-start, len);
- e0 = inner_prod(sw, sw, len);
- j=asri(1,-start-1); k=j+20;
- _sw10 = ld32x(sw,j);
- _sw32 = ld32x(sw,k);
- limit = end-1-start;
- for ( i=1,--j,--k ; i<limit ; i+=2,--j,--k )
- { register int _energy1, __sw10, __sw32, __s0, __s2;
-
- _s0 = sex16(_sw10);
- _s2 = sex16(_sw32);
- _energy1 = (_energy0 + ((_s0 * _s0) >> 6)) - ((_s2 * _s2) >> 6);
- _energy0 = imax(0,_energy1);
- energy[i] = _energy0;
- __sw10 = ld32x(sw,j);
- __sw32 = ld32x(sw,k);
- __s0 = asri(16,__sw10);
- __s2 = asri(16,__sw32);
- _energy1 = (_energy0 + ((__s0 * __s0) >> 6)) - ((__s2 * __s2) >> 6);
- _energy0 = imax(0,_energy1);
- energy[i+1] = _energy0;
- _sw10 = __sw10;
- _sw32 = __sw32;
- }
- _s0 = sex16(_sw10);
- _s2 = sex16(_sw32);
- _energy0 = imax(0,(_energy0 + ((_s0 * _s0) >> 6)) - ((_s2 * _s2) >> 6));
- energy[i] = _energy0;
- eshift = normalize16(energy, ener16, 32766, end-start+1);
- /* In fixed-point, this actually overrites the energy array (aliased to corr) */
- pitch_xcorr(sw, sw-end, corr, len, end-start+1, stack);
- /* Normalize to 180 so we can square it and it still fits in 16 bits */
- cshift = normalize16(corr, corr16, 180, end-start+1);
- /* If we scaled weighted input down, we need to scale it up again (OK, so we've just lost the LSB, who cares?) */
-
- if ( scaledown )
- {
- for ( j=asri(1,-end),k=asli(1,-end) ; j<N2 ; ++j,k+=4 )
- { register int _sw10;
-
- _sw10 = ld32x(sw,j);
- _sw10 = dualasl(_sw10,1);
- st32d(k, sw, _sw10);
- }
- }
- /* Search for the best pitch prediction gain */
- for ( i=start,l=0 ; i<end ; i+=2,++l )
- { register int _corr16, _c0, _c1;
- register int _ener16, _e0, _e1;
- _corr16 = ld32x(corr16,l);
- _corr16 = dspidualmul(_corr16,_corr16);
- _c0 = sex16(_corr16);
- _c1 = asri(16,_corr16);
- _ener16 = ld32x(ener16,l);
- _ener16 = dspidualadd(_ener16,0x00010001);
- _e0 = sex16(_ener16);
- _e1 = asri(16,_ener16);
- /* Instead of dividing the tmp by the energy, we multiply on the other side */
-
- if ( (_c0 * best_ener[N-1]) > (best_score[N-1] * _e0) )
- {
- best_score[N-1] = _c0;
- best_ener[N-1] = _e0;
- pitch[N-1] = i;
- for( j=0 ; j<N-1 ; ++j )
- { if ( (_c0 * best_ener[j]) > best_score[j] * _e0 )
- { for( k=N-1 ; k>j ; --k )
- {
- best_score[k]=best_score[k-1];
- best_ener[k]=best_ener[k-1];
- pitch[k]=pitch[k-1];
- }
- best_score[j]=_c0;
- best_ener[j]=_e0;
- pitch[j]=i;
- break;
- }
- }
- }
- if ( (_c1 * best_ener[N-1]) > (best_score[N-1] * _e1) )
- {
- best_score[N-1] = _c1;
- best_ener[N-1] = _e1;
- pitch[N-1] = i+1;
- for( j=0 ; j<N-1 ; ++j )
- { if ( (_c1 * best_ener[j]) > best_score[j] * _e1 )
- { for( k=N-1 ; k>j ; --k )
- {
- best_score[k]=best_score[k-1];
- best_ener[k]=best_ener[k-1];
- pitch[k]=pitch[k-1];
- }
- best_score[j]=_c1;
- best_ener[j]=_e1;
- pitch[j]=i+1;
- break;
- }
- }
- }
- }
-
- /* Compute open-loop gain if necessary */
- if (gain)
- {
- for (j=0;j<N;j++)
- {
- spx_word16_t g;
- i=pitch[j];
- g = DIV32(SHL32(EXTEND32(corr16[i-start]),cshift), 10+SHR32(MULT16_16(spx_sqrt(e0),spx_sqrt(SHL32(EXTEND32(ener16[i-start]),eshift))),6));
- gain[j] = imax(0,g);
- }
- }
- OPENLOOPNBESTPITCH_STOP();
- }
- #endif
|