nb_celp.h 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. /* Copyright (C) 2002-2006 Jean-Marc Valin */
  2. /**
  3. @file nb_celp.h
  4. @brief Narrowband CELP encoder/decoder
  5. */
  6. /*
  7. Redistribution and use in source and binary forms, with or without
  8. modification, are permitted provided that the following conditions
  9. are met:
  10. - Redistributions of source code must retain the above copyright
  11. notice, this list of conditions and the following disclaimer.
  12. - Redistributions in binary form must reproduce the above copyright
  13. notice, this list of conditions and the following disclaimer in the
  14. documentation and/or other materials provided with the distribution.
  15. - Neither the name of the Xiph.org Foundation nor the names of its
  16. contributors may be used to endorse or promote products derived from
  17. this software without specific prior written permission.
  18. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19. ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20. LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21. A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
  22. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  23. EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  24. PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  25. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  26. LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  27. NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  28. SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29. */
  30. #ifndef NB_CELP_H
  31. #define NB_CELP_H
  32. #include "modes.h"
  33. #include "speex/speex_callbacks.h"
  34. #include "vbr.h"
  35. #include "filters.h"
  36. #ifdef VORBIS_PSYCHO
  37. #include "vorbis_psy.h"
  38. #endif
  39. #define NB_ORDER 10
  40. #define NB_FRAME_SIZE 160
  41. #define NB_SUBFRAME_SIZE 40
  42. #define NB_NB_SUBFRAMES 4
  43. #define NB_PITCH_START 17
  44. #define NB_PITCH_END 144
  45. #define NB_WINDOW_SIZE (NB_FRAME_SIZE+NB_SUBFRAME_SIZE)
  46. #define NB_EXCBUF (NB_FRAME_SIZE+NB_PITCH_END+2)
  47. #define NB_DEC_BUFFER (NB_FRAME_SIZE+2*NB_PITCH_END+NB_SUBFRAME_SIZE+12)
  48. /**Structure representing the full state of the narrowband encoder*/
  49. typedef struct EncState {
  50. const SpeexMode *mode; /**< Mode corresponding to the state */
  51. int first; /**< Is this the first frame? */
  52. spx_word32_t cumul_gain; /**< Product of previously used pitch gains (Q10) */
  53. int bounded_pitch; /**< Next frame should not rely on previous frames for pitch */
  54. int ol_pitch; /**< Open-loop pitch */
  55. int ol_voiced; /**< Open-loop voiced/non-voiced decision */
  56. int pitch[NB_NB_SUBFRAMES];
  57. #ifdef VORBIS_PSYCHO
  58. VorbisPsy *psy;
  59. float *psy_window;
  60. float *curve;
  61. float *old_curve;
  62. #endif
  63. spx_word16_t gamma1; /**< Perceptual filter: A(z/gamma1) */
  64. spx_word16_t gamma2; /**< Perceptual filter: A(z/gamma2) */
  65. spx_word16_t lpc_floor; /**< Noise floor multiplier for A[0] in LPC analysis*/
  66. char *stack; /**< Pseudo-stack allocation for temporary memory */
  67. spx_word16_t winBuf[NB_WINDOW_SIZE-NB_FRAME_SIZE]; /**< Input buffer (original signal) */
  68. spx_word16_t excBuf[NB_EXCBUF]; /**< Excitation buffer */
  69. spx_word16_t *exc; /**< Start of excitation frame */
  70. spx_word16_t swBuf[NB_EXCBUF]; /**< Weighted signal buffer */
  71. spx_word16_t *sw; /**< Start of weighted signal frame */
  72. const spx_word16_t *window; /**< Temporary (Hanning) window */
  73. const spx_word16_t *lagWindow; /**< Window applied to auto-correlation */
  74. spx_lsp_t old_lsp[NB_ORDER]; /**< LSPs for previous frame */
  75. spx_lsp_t old_qlsp[NB_ORDER]; /**< Quantized LSPs for previous frame */
  76. spx_mem_t mem_sp[NB_ORDER]; /**< Filter memory for signal synthesis */
  77. spx_mem_t mem_sw[NB_ORDER]; /**< Filter memory for perceptually-weighted signal */
  78. spx_mem_t mem_sw_whole[NB_ORDER]; /**< Filter memory for perceptually-weighted signal (whole frame)*/
  79. spx_mem_t mem_exc[NB_ORDER]; /**< Filter memory for excitation (whole frame) */
  80. spx_mem_t mem_exc2[NB_ORDER]; /**< Filter memory for excitation (whole frame) */
  81. spx_mem_t mem_hp[2]; /**< High-pass filter memory */
  82. spx_word32_t pi_gain[NB_NB_SUBFRAMES]; /**< Gain of LPC filter at theta=pi (fe/2) */
  83. spx_word16_t *innov_rms_save; /**< If non-NULL, innovation RMS is copied here */
  84. #ifndef DISABLE_VBR
  85. VBRState vbr; /**< State of the VBR data */
  86. float vbr_quality; /**< Quality setting for VBR encoding */
  87. float relative_quality; /**< Relative quality that will be needed by VBR */
  88. spx_int32_t vbr_enabled; /**< 1 for enabling VBR, 0 otherwise */
  89. spx_int32_t vbr_max; /**< Max bit-rate allowed in VBR mode */
  90. int vad_enabled; /**< 1 for enabling VAD, 0 otherwise */
  91. int dtx_enabled; /**< 1 for enabling DTX, 0 otherwise */
  92. int dtx_count; /**< Number of consecutive DTX frames */
  93. spx_int32_t abr_enabled; /**< ABR setting (in bps), 0 if off */
  94. float abr_drift;
  95. float abr_drift2;
  96. float abr_count;
  97. #endif /* #ifndef DISABLE_VBR */
  98. int complexity; /**< Complexity setting (0-10 from least complex to most complex) */
  99. spx_int32_t sampling_rate;
  100. int plc_tuning;
  101. int encode_submode;
  102. const SpeexSubmode * const *submodes; /**< Sub-mode data */
  103. int submodeID; /**< Activated sub-mode */
  104. int submodeSelect; /**< Mode chosen by the user (may differ from submodeID if VAD is on) */
  105. int isWideband; /**< Is this used as part of the embedded wideband codec */
  106. int highpass_enabled; /**< Is the input filter enabled */
  107. } EncState;
  108. /**Structure representing the full state of the narrowband decoder*/
  109. typedef struct DecState {
  110. const SpeexMode *mode; /**< Mode corresponding to the state */
  111. int first; /**< Is this the first frame? */
  112. int count_lost; /**< Was the last frame lost? */
  113. spx_int32_t sampling_rate;
  114. spx_word16_t last_ol_gain; /**< Open-loop gain for previous frame */
  115. char *stack; /**< Pseudo-stack allocation for temporary memory */
  116. spx_word16_t excBuf[NB_DEC_BUFFER]; /**< Excitation buffer */
  117. spx_word16_t *exc; /**< Start of excitation frame */
  118. spx_lsp_t old_qlsp[NB_ORDER]; /**< Quantized LSPs for previous frame */
  119. spx_coef_t interp_qlpc[NB_ORDER]; /**< Interpolated quantized LPCs */
  120. spx_mem_t mem_sp[NB_ORDER]; /**< Filter memory for synthesis signal */
  121. spx_mem_t mem_hp[2]; /**< High-pass filter memory */
  122. spx_word32_t pi_gain[NB_NB_SUBFRAMES]; /**< Gain of LPC filter at theta=pi (fe/2) */
  123. spx_word16_t *innov_save; /** If non-NULL, innovation is copied here */
  124. spx_word16_t level;
  125. spx_word16_t max_level;
  126. spx_word16_t min_level;
  127. /* This is used in packet loss concealment */
  128. int last_pitch; /**< Pitch of last correctly decoded frame */
  129. spx_word16_t last_pitch_gain; /**< Pitch gain of last correctly decoded frame */
  130. spx_word16_t pitch_gain_buf[3]; /**< Pitch gain of last decoded frames */
  131. int pitch_gain_buf_idx; /**< Tail of the buffer */
  132. spx_uint32_t seed; /** Seed used for random number generation */
  133. int encode_submode;
  134. const SpeexSubmode * const *submodes; /**< Sub-mode data */
  135. int submodeID; /**< Activated sub-mode */
  136. int lpc_enh_enabled; /**< 1 when LPC enhancer is on, 0 otherwise */
  137. SpeexCallback speex_callbacks[SPEEX_MAX_CALLBACKS];
  138. SpeexCallback user_callback;
  139. /*Vocoder data*/
  140. spx_word16_t voc_m1;
  141. spx_word32_t voc_m2;
  142. spx_word16_t voc_mean;
  143. int voc_offset;
  144. int dtx_enabled;
  145. int isWideband; /**< Is this used as part of the embedded wideband codec */
  146. int highpass_enabled; /**< Is the input filter enabled */
  147. } DecState;
  148. /** Initializes encoder state*/
  149. void *nb_encoder_init(const SpeexMode *m);
  150. /** De-allocates encoder state resources*/
  151. void nb_encoder_destroy(void *state);
  152. /** Encodes one frame*/
  153. int nb_encode(void *state, void *in, SpeexBits *bits);
  154. /** Initializes decoder state*/
  155. void *nb_decoder_init(const SpeexMode *m);
  156. /** De-allocates decoder state resources*/
  157. void nb_decoder_destroy(void *state);
  158. /** Decodes one frame*/
  159. int nb_decode(void *state, SpeexBits *bits, void *out);
  160. /** ioctl-like function for controlling a narrowband encoder */
  161. int nb_encoder_ctl(void *state, int request, void *ptr);
  162. /** ioctl-like function for controlling a narrowband decoder */
  163. int nb_decoder_ctl(void *state, int request, void *ptr);
  164. #endif