123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491 |
- /*--------------------------------------------------------------------------*\
- FILE........: vqtrainsp.c
- AUTHOR......: David Rowe
- DATE CREATED: 7 August 2012
- This program trains sparse amplitude vector quantisers.
- Modified from vqtrainph.c
- \*--------------------------------------------------------------------------*/
- /*
- Copyright (C) 2012 David Rowe
- All rights reserved.
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU Lesser General Public License version 2, as
- published by the Free Software Foundation. This program is
- distributed in the hope that it will be useful, but WITHOUT ANY
- WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
- License for more details.
- You should have received a copy of the GNU Lesser General Public License
- along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
- /*-----------------------------------------------------------------------*\
- INCLUDES
- \*-----------------------------------------------------------------------*/
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <math.h>
- #include <ctype.h>
- #include <assert.h>
- typedef struct {
- float real;
- float imag;
- } COMP;
- /*-----------------------------------------------------------------------* \
- DEFINES
- \*-----------------------------------------------------------------------*/
- #define DELTAQ 0.01 /* quiting distortion */
- #define MAX_STR 80 /* maximum string length */
- /*-----------------------------------------------------------------------*\
- FUNCTION PROTOTYPES
- \*-----------------------------------------------------------------------*/
- void zero(float v[], int d);
- void acc(float v1[], float v2[], int d);
- void norm(float v[], int k, int n[]);
- int quantise(float cb[], float vec[], int d, int e, float *se);
- void print_vec(float cb[], int d, int e);
- void split(float cb[], int d, int b);
- int gain_shape_quantise(float cb[], float vec[], int d, int e, float *se, float *best_gain);
- /*-----------------------------------------------------------------------* \
- MAIN
- \*-----------------------------------------------------------------------*/
- int main(int argc, char *argv[]) {
- int d,e; /* dimension and codebook size */
- float *vec; /* current vector */
- float *cb; /* vector codebook */
- float *cent; /* centroids for each codebook entry */
- int *n; /* number of vectors in this interval */
- int J; /* number of vectors in training set */
- int ind; /* index of current vector */
- float se; /* total squared error for this iteration */
- float var; /* variance */
- float var_1; /* previous variance */
- float delta; /* improvement in distortion */
- FILE *ftrain; /* file containing training set */
- FILE *fvq; /* file containing vector quantiser */
- int ret;
- int i,j, finished, iterations;
- float sd;
- int var_n, bits, b, levels;
- /* Interpret command line arguments */
- if (argc < 5) {
- printf("usage: %s TrainFile D(dimension) B(number of bits) VQFile [error.txt file]\n", argv[0]);
- exit(1);
- }
- /* Open training file */
- ftrain = fopen(argv[1],"rb");
- if (ftrain == NULL) {
- printf("Error opening training database file: %s\n",argv[1]);
- exit(1);
- }
- /* determine k and m, and allocate arrays */
- d = atoi(argv[2]);
- bits = atoi(argv[3]);
- e = 1<<bits;
- printf("\n");
- printf("dimension D=%d number of bits B=%d entries E=%d\n", d, bits, e);
- vec = (float*)malloc(sizeof(float)*d);
- cb = (float*)malloc(sizeof(float)*d*e);
- cent = (float*)malloc(sizeof(float)*d*e);
- n = (int*)malloc(sizeof(int)*d*e);
- if (cb == NULL || cb == NULL || cent == NULL || vec == NULL) {
- printf("Error in malloc.\n");
- exit(1);
- }
- /* determine size of training set */
- J = 0;
- var_n = 0;
- while(fread(vec, sizeof(float), d, ftrain) == (size_t)d) {
- for(j=0; j<d; j++)
- if (vec[j] != 0.0)
- var_n++;
- J++;
- }
- printf("J=%d sparse vectors in training set, %d non-zero values\n", J, var_n);
- /* set up initial codebook from centroid of training set */
- //#define DBG
- zero(cent, d);
- for(j=0; j<d; j++)
- n[j] = 0;
- rewind(ftrain);
- #ifdef DBG
- printf("initial codebook...\n");
- #endif
- for(i=0; i<J; i++) {
- ret = fread(vec, sizeof(float), d, ftrain);
- #ifdef DBG
- print_vec(vec, d, 1);
- #endif
- acc(cent, vec, d);
- for(j=0; j<d; j++)
- if (vec[j] != 0.0)
- n[j]++;
- }
- norm(cent, d, n);
- memcpy(cb, cent, d*sizeof(float));
- #ifdef DBG
- printf("\n");
- print_vec(cb, d, 1);
- #endif
- /* main loop */
- printf("\n");
- printf("bits Iteration delta var std dev\n");
- printf("---------------------------------------\n");
- for(b=1; b<=bits; b++) {
- levels = 1<<b;
- iterations = 0;
- finished = 0;
- delta = 0;
- var_1 = 0.0;
- split(cb, d, levels/2);
- //print_vec(cb, d, levels);
- do {
- /* zero centroids */
- for(i=0; i<levels; i++) {
- zero(¢[i*d], d);
- for(j=0; j<d; j++)
- n[i*d+j] = 0;
- }
- //#define DBG
- #ifdef DBG
- printf("cb...\n");
- print_vec(cb, d, levels);
- printf("\n\nquantise...\n");
- #endif
- /* quantise training set */
- se = 0.0;
- rewind(ftrain);
- for(i=0; i<J; i++) {
- ret = fread(vec, sizeof(float), d, ftrain);
- ind = quantise(cb, vec, d, levels, &se);
- //ind = gain_shape_quantise(cb, vec, d, levels, &se, &best_gain);
- //for(j=0; j<d; j++)
- // if (vec[j] != 0.0)
- // vec[j] += best_gain;
- #ifdef DBG
- print_vec(vec, d, 1);
- printf(" ind %d se: %f\n", ind, se);
- #endif
- acc(¢[ind*d], vec, d);
- for(j=0; j<d; j++)
- if (vec[j] != 0.0)
- n[ind*d+j]++;
- }
-
- #ifdef DBG
- printf("cent...\n");
- print_vec(cent, d, e);
- printf("\n");
- #endif
- /* work out stats */
- var = se/var_n;
- sd = sqrt(var);
- iterations++;
- if (iterations > 1) {
- if (var > 0.0) {
- delta = (var_1 - var)/var;
- }
- else
- delta = 0;
- if (delta < DELTAQ)
- finished = 1;
- }
-
- if (!finished) {
- /* determine new codebook from centroids */
- for(i=0; i<levels; i++) {
- norm(¢[i*d], d, &n[i*d]);
- memcpy(&cb[i*d], ¢[i*d], d*sizeof(float));
- }
- }
- #ifdef DBG
- printf("new cb ...\n");
- print_vec(cent, d, e);
- printf("\n");
- #endif
- printf("%2d %2d %4.3f %6.3f %4.3f\r",b,iterations, delta, var, sd);
- fflush(stdout);
- var_1 = var;
- } while (!finished);
- printf("\n");
- }
-
- //print_vec(cb, d, 1);
-
- /* save codebook to disk */
- fvq = fopen(argv[4],"wt");
- if (fvq == NULL) {
- printf("Error opening VQ file: %s\n",argv[4]);
- exit(1);
- }
- fprintf(fvq,"%d %d\n",d,e);
- for(j=0; j<e; j++) {
- for(i=0; i<d; i++)
- fprintf(fvq,"% 7.3f ", cb[j*d+i]);
- fprintf(fvq,"\n");
- }
- fclose(fvq);
- /* optionally dump error file for multi-stage work */
- if (argc == 6) {
- FILE *ferr = fopen(argv[5],"wt");
- assert(ferr != NULL);
- rewind(ftrain);
- for(i=0; i<J; i++) {
- ret = fread(vec, sizeof(float), d, ftrain);
- ind = quantise(cb, vec, d, levels, &se);
- for(j=0; j<d; j++) {
- if (vec[j] != 0.0)
- vec[j] -= cb[ind*d+j];
- fprintf(ferr, "%f ", vec[j]);
- }
- fprintf(ferr, "\n");
- }
- }
- return 0;
- }
- /*-----------------------------------------------------------------------*\
- FUNCTIONS
- \*-----------------------------------------------------------------------*/
- void print_vec(float cb[], int d, int e)
- {
- int i,j;
- for(j=0; j<e; j++) {
- printf(" ");
- for(i=0; i<d; i++)
- printf("% 7.3f ", cb[j*d+i]);
- printf("\n");
- }
- }
- /*---------------------------------------------------------------------------*\
- FUNCTION....: zero()
- AUTHOR......: David Rowe
- DATE CREATED: 23/2/95
- Zeros a vector of length d.
- \*---------------------------------------------------------------------------*/
- void zero(float v[], int d)
- {
- int i;
- for(i=0; i<d; i++) {
- v[i] = 0.0;
- }
- }
- /*---------------------------------------------------------------------------*\
- FUNCTION....: acc()
- AUTHOR......: David Rowe
- DATE CREATED: 23/2/95
- Adds d dimensional vectors v1 to v2 and stores the result back
- in v1.
- An unused entry in a sparse vector is set to zero so won't
- affect the accumulation process.
- \*---------------------------------------------------------------------------*/
- void acc(float v1[], float v2[], int d)
- {
- int i;
- for(i=0; i<d; i++)
- v1[i] += v2[i];
- }
- /*---------------------------------------------------------------------------*\
- FUNCTION....: norm()
- AUTHOR......: David Rowe
- DATE CREATED: 23/2/95
- Normalises each element in d dimensional vector.
- \*---------------------------------------------------------------------------*/
- void norm(float v[], int d, int n[])
- {
- int i;
- for(i=0; i<d; i++) {
- if (n[i] != 0)
- v[i] /= n[i];
- }
- }
- /*---------------------------------------------------------------------------*\
- FUNCTION....: quantise()
- AUTHOR......: David Rowe
- DATE CREATED: 23/2/95
- Quantises vec by choosing the nearest vector in codebook cb, and
- returns the vector index. The squared error of the quantised vector
- is added to se.
- Unused entries in sparse vectors are ignored.
- \*---------------------------------------------------------------------------*/
- int quantise(float cb[], float vec[], int d, int e, float *se)
- {
- float error; /* current error */
- int besti; /* best index so far */
- float best_error; /* best error so far */
- int i,j;
- float diff;
- besti = 0;
- best_error = 1E32;
- for(j=0; j<e; j++) {
- error = 0.0;
- for(i=0; i<d; i++) {
- if (vec[i] != 0.0) {
- diff = cb[j*d+i] - vec[i];
- error += diff*diff;
- }
- }
- if (error < best_error) {
- best_error = error;
- besti = j;
- }
- }
- *se += best_error;
- return(besti);
- }
- int gain_shape_quantise(float cb[], float vec[], int d, int e, float *se, float *best_gain)
- {
- float error; /* current error */
- int besti; /* best index so far */
- float best_error; /* best error so far */
- int i,j,m;
- float diff, metric, best_metric, gain, sumAm, sumCb;
- besti = 0;
- best_metric = best_error = 1E32;
- for(j=0; j<e; j++) {
- /* compute optimum gain */
- sumAm = sumCb = 0.0;
- m = 0;
- for(i=0; i<d; i++) {
- if (vec[i] != 0.0) {
- m++;
- sumAm += vec[i];
- sumCb += cb[j*d+i];
- }
- }
- gain = (sumAm - sumCb)/m;
-
- /* compute error */
- metric = error = 0.0;
- for(i=0; i<d; i++) {
- if (vec[i] != 0.0) {
- diff = vec[i] - cb[j*d+i] - gain;
- error += diff*diff;
- metric += diff*diff;
- }
- }
- if (metric < best_metric) {
- best_error = error;
- best_metric = metric;
- *best_gain = gain;
- besti = j;
- }
- }
- *se += best_error;
- return(besti);
- }
- void split(float cb[], int d, int levels)
- {
- int i,j;
- for (i=0;i<levels;i++) {
- for (j=0;j<d;j++) {
- float delta = .01*(rand()/(float)RAND_MAX-.5);
- cb[i*d+j] += delta;
- cb[(i+levels)*d+j] = cb[i*d+j] - delta;
- }
- }
- }
|