vqtrainjnd.c 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. /*--------------------------------------------------------------------------*\
  2. FILE........: vqtrainjnd.c
  3. AUTHOR......: David Rowe
  4. DATE CREATED: 10 Nov 2011
  5. This program trains vector quantisers for LSPs using an
  6. experimental, but very simple Just Noticable Difference (JND)
  7. algorithm:
  8. - we quantise each training vector to JND steps (say 100Hz for LSPs
  9. 5-10)
  10. - we then use the most popular training vectors as our VQ codebook
  11. \*--------------------------------------------------------------------------*/
  12. /*
  13. Copyright (C) 2011 David Rowe
  14. All rights reserved.
  15. This program is free software; you can redistribute it and/or modify
  16. it under the terms of the GNU Lesser General Public License version 2, as
  17. published by the Free Software Foundation. This program is
  18. distributed in the hope that it will be useful, but WITHOUT ANY
  19. WARRANTY; without even the implied warranty of MERCHANTABILITY or
  20. FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
  21. License for more details.
  22. You should have received a copy of the GNU Lesser General Public License
  23. along with this program; if not, see <http://www.gnu.org/licenses/>.
  24. */
  25. /*-----------------------------------------------------------------------*\
  26. INCLUDES
  27. \*-----------------------------------------------------------------------*/
  28. #include <stdio.h>
  29. #include <stdlib.h>
  30. #include <string.h>
  31. #include <math.h>
  32. #include <ctype.h>
  33. /*-----------------------------------------------------------------------*\
  34. DEFINES
  35. \*-----------------------------------------------------------------------*/
  36. #define PI 3.141592654 /* mathematical constant */
  37. #define MAX_POP 10
  38. /*-----------------------------------------------------------------------*\
  39. FUNCTION PROTOTYPES
  40. \*-----------------------------------------------------------------------*/
  41. void zero(float v[], int k);
  42. void acc(float v1[], float v2[], int k);
  43. void norm(float v[], int k, long n);
  44. void locate_lsps_jnd_steps(float lsps[], float step, int k);
  45. /*-----------------------------------------------------------------------* \
  46. MAIN
  47. \*-----------------------------------------------------------------------*/
  48. int main(int argc, char *argv[]) {
  49. int k; /* dimension and codebook size */
  50. float *vec; /* current vector */
  51. int *n; /* number of vectors in this interval */
  52. int J; /* number of vectors in training set */
  53. int i,j;
  54. FILE *ftrain; /* file containing training set */
  55. float *train; /* training database */
  56. //float *pend_train; /* last entry */
  57. float *pt;
  58. int ntrain, match, vec_exists, vec_index=0, entry;
  59. int popular[MAX_POP], pop_thresh;
  60. FILE *fvq;
  61. float jnd;
  62. /* Interpret command line arguments */
  63. if (argc != 6) {
  64. printf("usage: %s TrainFile K(dimension) JND popThresh VQFile\n",
  65. argv[0]);
  66. exit(1);
  67. }
  68. /* Open training file */
  69. ftrain = fopen(argv[1],"rb");
  70. if (ftrain == NULL) {
  71. printf("Error opening training database file: %s\n",argv[1]);
  72. exit(1);
  73. }
  74. /* determine k and m, and allocate arrays */
  75. k = atol(argv[2]);
  76. jnd = atof(argv[3]);
  77. pop_thresh = atol(argv[4]);
  78. printf("dimension K=%d popThresh=%d JND=%3.1f Hz\n",
  79. k, pop_thresh, jnd);
  80. vec = (float*)malloc(sizeof(float)*k);
  81. if (vec == NULL) {
  82. printf("Error in malloc.\n");
  83. exit(1);
  84. }
  85. /* determine size of training set */
  86. J = 0;
  87. while(fread(vec, sizeof(float), k, ftrain) == (size_t)k)
  88. J++;
  89. printf("J=%d entries in training set\n", J);
  90. train = (float*)malloc(sizeof(float)*k*J);
  91. if (train == NULL) {
  92. printf("Error in malloc.\n");
  93. exit(1);
  94. }
  95. printf("training array is %d bytes\n", sizeof(float)*k*J);
  96. n = (int*)malloc(sizeof(int)*J);
  97. if (n == NULL) {
  98. printf("Error in malloc.\n");
  99. exit(1);
  100. }
  101. for(i=0; i<J; i++)
  102. n[i] = 0;
  103. /* now load up train data base and quantise */
  104. rewind(ftrain);
  105. ntrain = 0;
  106. entry = 0;
  107. while(fread(vec, sizeof(float), k, ftrain) == (size_t)k) {
  108. /* convert to Hz */
  109. for(j=0; j<k; j++)
  110. vec[j] *= 4000.0/PI;
  111. /* quantise to JND steps */
  112. locate_lsps_jnd_steps(vec, jnd, k);
  113. /* see if a match already exists in database */
  114. pt = train;
  115. vec_exists = 0;
  116. for(i=0; i<ntrain; i++) {
  117. match = 1;
  118. for(j=0; j<k; j++)
  119. if (vec[j] != pt[j])
  120. match = 0;
  121. if (match) {
  122. vec_exists = 1;
  123. vec_index = i;
  124. }
  125. pt += k;
  126. }
  127. if (vec_exists)
  128. n[vec_index]++;
  129. else {
  130. /* add to database */
  131. for(j=0; j<k; j++) {
  132. train[ntrain*k + j] = vec[j];
  133. }
  134. ntrain++;
  135. }
  136. entry++;
  137. if ((entry % 100) == 0)
  138. printf("\rtrain input vectors: %d unique vectors: %d",
  139. entry, ntrain);
  140. }
  141. printf("\n");
  142. for(i=0; i<MAX_POP; i++)
  143. popular[i] = 0;
  144. for(i=0; i<ntrain; i++) {
  145. if (n[i] < MAX_POP)
  146. popular[n[i]]++;
  147. }
  148. for(i=0; i<MAX_POP; i++)
  149. printf("popular[%d] = %d\n", i, popular[i]);
  150. /* dump result */
  151. fvq = fopen(argv[5],"wt");
  152. if (fvq == NULL) {
  153. printf("Error opening VQ file: %s\n",argv[4]);
  154. exit(1);
  155. }
  156. fprintf(fvq,"%d %d\n", k, popular[pop_thresh]);
  157. for(i=0; i<ntrain; i++) {
  158. if (n[i] > pop_thresh) {
  159. for(j=0; j<k; j++)
  160. fprintf(fvq, "%4.1f ",train[i*k+j]);
  161. fprintf(fvq,"\n");
  162. }
  163. }
  164. fclose(fvq);
  165. return 0;
  166. }
  167. /*-----------------------------------------------------------------------*\
  168. FUNCTIONS
  169. \*-----------------------------------------------------------------------*/
  170. /*---------------------------------------------------------------------------*\
  171. FUNCTION....: locate_lsps_jnd_steps()
  172. AUTHOR......: David Rowe
  173. DATE CREATED: 27/10/2011
  174. Applies a form of Bandwidth Expansion (BW) to a vector of LSPs.
  175. Listening tests have determined that "quantising" the position of
  176. each LSP (say to 100Hz steps for LSPs 5..10) introduces a "just
  177. noticable difference" in the synthesised speech.
  178. This operation can be used before quantisation to limit the input
  179. data to the quantiser to a number of discrete steps.
  180. \*---------------------------------------------------------------------------*/
  181. void locate_lsps_jnd_steps(float lsps[], float step, int k)
  182. {
  183. int i;
  184. for(i=0; i<k; i++) {
  185. lsps[i] = floor(lsps[i]/step + 0.5)*step;
  186. if (i) {
  187. if (lsps[i] == lsps[i-1])
  188. lsps[i] += step;
  189. }
  190. }
  191. }