vqtrainph.c 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419
  1. /*--------------------------------------------------------------------------*\
  2. FILE........: vqtrainph.c
  3. AUTHOR......: David Rowe
  4. DATE CREATED: 27 July 2012
  5. This program trains phase vector quantisers. Modified from
  6. vqtrain.c
  7. \*--------------------------------------------------------------------------*/
  8. /*
  9. Copyright (C) 2012 David Rowe
  10. All rights reserved.
  11. This program is free software; you can redistribute it and/or modify
  12. it under the terms of the GNU Lesser General Public License version 2, as
  13. published by the Free Software Foundation. This program is
  14. distributed in the hope that it will be useful, but WITHOUT ANY
  15. WARRANTY; without even the implied warranty of MERCHANTABILITY or
  16. FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
  17. License for more details.
  18. You should have received a copy of the GNU Lesser General Public License
  19. along with this program; if not, see <http://www.gnu.org/licenses/>.
  20. */
  21. /*-----------------------------------------------------------------------*\
  22. INCLUDES
  23. \*-----------------------------------------------------------------------*/
  24. #include <stdio.h>
  25. #include <stdlib.h>
  26. #include <string.h>
  27. #include <math.h>
  28. #include <ctype.h>
  29. #include <assert.h>
  30. typedef struct {
  31. float real;
  32. float imag;
  33. } COMP;
  34. /*-----------------------------------------------------------------------* \
  35. DEFINES
  36. \*-----------------------------------------------------------------------*/
  37. #define DELTAQ 0.01 /* quiting distortion */
  38. #define MAX_STR 80 /* maximum string length */
  39. #define PI 3.141592654
  40. /*-----------------------------------------------------------------------*\
  41. FUNCTION PROTOTYPES
  42. \*-----------------------------------------------------------------------*/
  43. void zero(COMP v[], int d);
  44. void acc(COMP v1[], COMP v2[], int d);
  45. void norm(COMP v[], int k);
  46. int quantise(COMP cb[], COMP vec[], int d, int e, float *se);
  47. void print_vec(COMP cb[], int d, int e);
  48. /*-----------------------------------------------------------------------* \
  49. MAIN
  50. \*-----------------------------------------------------------------------*/
  51. int main(int argc, char *argv[]) {
  52. int d,e; /* dimension and codebook size */
  53. COMP *vec; /* current vector */
  54. COMP *cb; /* vector codebook */
  55. COMP *cent; /* centroids for each codebook entry */
  56. int *n; /* number of vectors in this interval */
  57. int J; /* number of vectors in training set */
  58. int ind; /* index of current vector */
  59. float se; /* total squared error for this iteration */
  60. float var; /* variance */
  61. float var_1; /* previous variance */
  62. float delta; /* improvement in distortion */
  63. FILE *ftrain; /* file containing training set */
  64. FILE *fvq; /* file containing vector quantiser */
  65. int ret;
  66. int i,j, finished, iterations;
  67. float b; /* equivalent number of bits */
  68. float improvement;
  69. float sd_vec, sd_element, sd_theory, bits_theory;
  70. int var_n;
  71. /* Interpret command line arguments */
  72. if (argc != 5) {
  73. printf("usage: %s TrainFile D(dimension) E(number of entries) VQFile\n", argv[0]);
  74. exit(1);
  75. }
  76. /* Open training file */
  77. ftrain = fopen(argv[1],"rb");
  78. if (ftrain == NULL) {
  79. printf("Error opening training database file: %s\n",argv[1]);
  80. exit(1);
  81. }
  82. /* determine k and m, and allocate arrays */
  83. d = atoi(argv[2]);
  84. e = atoi(argv[3]);
  85. printf("\n");
  86. printf("dimension D=%d number of entries E=%d\n", d, e);
  87. vec = (COMP*)malloc(sizeof(COMP)*d);
  88. cb = (COMP*)malloc(sizeof(COMP)*d*e);
  89. cent = (COMP*)malloc(sizeof(COMP)*d*e);
  90. n = (int*)malloc(sizeof(int)*e);
  91. if (cb == NULL || cb == NULL || cent == NULL || vec == NULL) {
  92. printf("Error in malloc.\n");
  93. exit(1);
  94. }
  95. /* determine size of training set */
  96. J = 0;
  97. var_n = 0;
  98. while(fread(vec, sizeof(COMP), d, ftrain) == (size_t)d) {
  99. for(j=0; j<d; j++)
  100. if ((vec[j].real != 0.0) && (vec[j].imag != 0.0))
  101. var_n++;
  102. J++;
  103. }
  104. printf("J=%d sparse vectors in training set, %d non-zero phases\n", J, var_n);
  105. /* set up initial codebook state from samples of training set */
  106. rewind(ftrain);
  107. ret = fread(cb, sizeof(COMP), d*e, ftrain);
  108. /* codebook can't have any zero phase angle entries, these need to be set to
  109. zero angle so cmult used to find phase angle differences works */
  110. for(i=0; i<d*e; i++)
  111. if ((cb[i].real == 0.0) && (cb[i].imag == 0.0)) {
  112. cb[i].real = 1.0;
  113. cb[i].imag = 0.0;
  114. }
  115. //print_vec(cb, d, 1);
  116. /* main loop */
  117. printf("\n");
  118. printf("Iteration delta var std dev\n");
  119. printf("--------------------------------\n");
  120. b = log10((float)e)/log10(2.0);
  121. sd_theory = (PI/sqrt(3.0))*pow(2.0, -b/(float)d);
  122. iterations = 0;
  123. finished = 0;
  124. delta = 0;
  125. var_1 = 0.0;
  126. do {
  127. /* zero centroids */
  128. for(i=0; i<e; i++) {
  129. zero(&cent[i*d], d);
  130. n[i] = 0;
  131. }
  132. /* quantise training set */
  133. se = 0.0;
  134. rewind(ftrain);
  135. for(i=0; i<J; i++) {
  136. ret = fread(vec, sizeof(COMP), d, ftrain);
  137. ind = quantise(cb, vec, d, e, &se);
  138. //printf("%d ", ind);
  139. n[ind]++;
  140. acc(&cent[ind*d], vec, d);
  141. }
  142. /* work out stats */
  143. var = se/var_n;
  144. sd_vec = sqrt(var);
  145. /* we need to know dimension of cb (which varies from vector to vector)
  146. to calc bits_theory. Maybe measure and use average dimension....
  147. */
  148. //bits_theory = d*log10(PI/(sd_element*sqrt(3.0)))/log10(2.0);
  149. //improvement = bits_theory - b;
  150. //print_vec(cent, d, 1);
  151. //print_vec(cb, d, 1);
  152. iterations++;
  153. if (iterations > 1) {
  154. if (var > 0.0) {
  155. delta = (var_1 - var)/var;
  156. }
  157. else
  158. delta = 0;
  159. if (delta < DELTAQ)
  160. finished = 1;
  161. }
  162. if (!finished) {
  163. /* determine new codebook from centroids */
  164. for(i=0; i<e; i++) {
  165. norm(&cent[i*d], d);
  166. memcpy(&cb[i*d], &cent[i*d], d*sizeof(COMP));
  167. }
  168. }
  169. printf("%2d %4.3f %4.3f %4.3f \n",iterations, delta, var, sd_vec);
  170. var_1 = var;
  171. } while (!finished);
  172. //print_vec(cb, d, 1);
  173. /* save codebook to disk */
  174. fvq = fopen(argv[4],"wt");
  175. if (fvq == NULL) {
  176. printf("Error opening VQ file: %s\n",argv[4]);
  177. exit(1);
  178. }
  179. fprintf(fvq,"%d %d\n",d,e);
  180. for(j=0; j<e; j++) {
  181. for(i=0; i<d; i++)
  182. fprintf(fvq,"% 4.3f ", atan2(cb[j*d+i].imag, cb[j*d+i].real));
  183. fprintf(fvq,"\n");
  184. }
  185. fclose(fvq);
  186. return 0;
  187. }
  188. /*-----------------------------------------------------------------------*\
  189. FUNCTIONS
  190. \*-----------------------------------------------------------------------*/
  191. void print_vec(COMP cb[], int d, int e)
  192. {
  193. int i,j;
  194. for(j=0; j<e; j++) {
  195. for(i=0; i<d; i++)
  196. printf("%f %f ", cb[j*d+i].real, cb[j*d+i].imag);
  197. printf("\n");
  198. }
  199. }
  200. static COMP cconj(COMP a)
  201. {
  202. COMP res;
  203. res.real = a.real;
  204. res.imag = -a.imag;
  205. return res;
  206. }
  207. static COMP cmult(COMP a, COMP b)
  208. {
  209. COMP res;
  210. res.real = a.real*b.real - a.imag*b.imag;
  211. res.imag = a.real*b.imag + a.imag*b.real;
  212. return res;
  213. }
  214. static COMP cadd(COMP a, COMP b)
  215. {
  216. COMP res;
  217. res.real = a.real + b.real;
  218. res.imag = a.imag + b.imag;
  219. return res;
  220. }
  221. /*---------------------------------------------------------------------------*\
  222. FUNCTION....: zero()
  223. AUTHOR......: David Rowe
  224. DATE CREATED: 23/2/95
  225. Zeros a vector of length d.
  226. \*---------------------------------------------------------------------------*/
  227. void zero(COMP v[], int d)
  228. {
  229. int i;
  230. for(i=0; i<d; i++) {
  231. v[i].real = 0.0;
  232. v[i].imag = 0.0;
  233. }
  234. }
  235. /*---------------------------------------------------------------------------*\
  236. FUNCTION....: acc()
  237. AUTHOR......: David Rowe
  238. DATE CREATED: 23/2/95
  239. Adds d dimensional vectors v1 to v2 and stores the result back
  240. in v1. We add them like vectors on the complex plane, summing
  241. the real and imag terms.
  242. An unused entry in a sparse vector has both the real and imag
  243. parts set to zero so won't affect the accumulation process.
  244. \*---------------------------------------------------------------------------*/
  245. void acc(COMP v1[], COMP v2[], int d)
  246. {
  247. int i;
  248. for(i=0; i<d; i++)
  249. v1[i] = cadd(v1[i], v2[i]);
  250. }
  251. /*---------------------------------------------------------------------------*\
  252. FUNCTION....: norm()
  253. AUTHOR......: David Rowe
  254. DATE CREATED: 23/2/95
  255. Normalises each element in d dimensional vector.
  256. \*---------------------------------------------------------------------------*/
  257. void norm(COMP v[], int d)
  258. {
  259. int i;
  260. float mag;
  261. for(i=0; i<d; i++) {
  262. mag = sqrt(v[i].real*v[i].real + v[i].imag*v[i].imag);
  263. if (mag == 0.0) {
  264. /* can't have zero cb entries as cmult will break in quantise().
  265. We effectively set sparese phases to an angle of 0. */
  266. v[i].real = 1.0;
  267. v[i].imag = 0.0;
  268. }
  269. else {
  270. v[i].real /= mag;
  271. v[i].imag /= mag;
  272. }
  273. }
  274. }
  275. /*---------------------------------------------------------------------------*\
  276. FUNCTION....: quantise()
  277. AUTHOR......: David Rowe
  278. DATE CREATED: 23/2/95
  279. Quantises vec by choosing the nearest vector in codebook cb, and
  280. returns the vector index. The squared error of the quantised vector
  281. is added to se.
  282. Unused entries in sparse vectors are ignored.
  283. \*---------------------------------------------------------------------------*/
  284. int quantise(COMP cb[], COMP vec[], int d, int e, float *se)
  285. {
  286. float error; /* current error */
  287. int besti; /* best index so far */
  288. float best_error; /* best error so far */
  289. int i,j;
  290. int ignore;
  291. COMP diff;
  292. besti = 0;
  293. best_error = 1E32;
  294. for(j=0; j<e; j++) {
  295. error = 0.0;
  296. for(i=0; i<d; i++) {
  297. ignore = (vec[i].real == 0.0) && (vec[i].imag == 0.0);
  298. if (!ignore) {
  299. diff = cmult(cb[j*d+i], cconj(vec[i]));
  300. error += pow(atan2(diff.imag, diff.real), 2.0);
  301. }
  302. }
  303. if (error < best_error) {
  304. best_error = error;
  305. besti = j;
  306. }
  307. }
  308. *se += best_error;
  309. return(besti);
  310. }