///////////////////////////////////////////////////////////////////// // // File: soundmix.c // Author: Adam Janin // adam@janin.org // 04/27/04 // // Copyright (C) 2004 International Computer Science Institute // // Originally from jsndmix.cc (02/21/00), which used internal ICSI // libraries for audio and statistics handling, and was written in C++. // // This version requires only libsndfile, which is licensed under the // Gnu Lesser General Public License, and is available at: // http://www.mega-nerd.com/libsndfile/ // // For more information on the ICSI Meeting Corpus, see: // http://www.icsi.berkeley.edu/Speech/mr // // This program is provided AS IS. It has not been extensively tested, // and we offer no support or warranty. // // // Given a set of audio files in many different formats, produce an // output audio file that is the mix of the input files. // // You can either provide a gain for each input audio file, or the // program can estimate the gain required to equalize the volume. // The autogain computation is quite primitive. See comments below. // // See usage() for command line arguments. // // This program should be very to easy to compile, as it only depends // on libsndfile and libm. On most UNIX systems: // // gcc -c soundmix.c // gcc -o soundmix soundmix.o -lsndfile -lm // // You may need to add -Idir to the first line and -Ldir to the second, // where dir points to libsndfile files (sndfile.h and libsndfile.a). // // // The autogain is computed by normalizing the input signals by their // standard deviation. This is computed by taking random samples of // size RandomSampleSize seconds until RandomSampleTime seconds have // been collected, and computing the stddev over that size. Note that // this requires lots of seeks, and will be very inefficient if the // stream is compressed or if RandomSampleTime is a large fraction of // the file size. You can specify a maximum gain. This helps prevent // very quiet channels from being boosted too much. You can also apply // a gain setting to the resulting output audio. Using a value less // than 1.0 helps prevent "clipping" when the signals are close to // saturation. // // RCS $Header: /n/www/export/htdocs/speech/mr/tools/RCS/soundmix.c,v 1.2 2004/08/16 20:36:51 janin Exp $ #include #include #include #include #include ////////////////////////////////////////////////////////////////////// // // Defines // #define MEMCHECK(x) \ if (!x) { \ fprintf(stderr, "Out of memory in %s line %d\n", \ __FILE__, __LINE__); \ exit(1); \ } ////////////////////////////////////////////////////////////////////// // // Globals // char* ProgName; int Verbose = 0; // See comments above int AutoGain = 0; float RandomSampleTime = 300.0; // 5 minutes float RandomSampleSize = 2.0; // 2 seconds // If autogain is used, the samples are picked randomly. Using // different random seeds will result is slightly different gains, // since the sampling will be different. int RandomSeed = 0; // Maximum gain to apply to any channel. <= 0 implies no max. float MaxGain = -1.0; // Buffer size (to hold one block of audio data while processing) int BlockSize = 8192; // Gain to apply to summed signal (after autogain or individual // gains). Use a value less than 1.0 if there's clipping in the // resulting file. float Gain = 1.0; ////////////////////////////////////////////////////////////////////// // // Prototypes // void usage(); void mix(int nfiles, SNDFILE** sounds, float* scales, SNDFILE* out); void auto_gain(int nfiles, SNDFILE** sounds, SF_INFO* sfinfo, float* scales); float std_snd(SNDFILE* in, SF_INFO* sfinfo); float std_snd_all(SNDFILE* in, SF_INFO* sfinfo); float std_snd_random(SNDFILE* in, SF_INFO* sfinfo, int sample_frames, long to_read); float my_atof(char*); int my_atoi(char*); ////////////////////////////////////////////////////////////////////// // // The following type and prototypes are an implementation of a // univariate statistics class (taken from jstats.cc). They are used // to compute the autogain. // typedef struct { double sum; // Sum of data points double sum2; // Sum of square of data points double min; // Smallest value double max; // Largest value int n; // Number of values } stat; stat* create_stat(); void delete_stat(); void stat_check(stat*, int); void stat_datum(stat*, double); double stat_mean(stat*); double stat_std(stat*); double stat_min(stat*); double stat_max(stat*); int stat_n(stat*); ////////////////////////////////////////////////////////////////////// // // Print command line usage and exit. // void usage() { fprintf(stderr, "\nUsage: %s -v -m maxgain -o outfile -g gain in1 sc1 in2 sc2 ... OR\n", ProgName); fprintf(stderr, " %s -a -v -m maxgain -o outfile -g gain in1 in2 ...\n", ProgName); fprintf(stderr, "where\n"); fprintf(stderr, " -o outfile Output file [stdout]\n"); fprintf(stderr, " -g gain Gain to apply to output file [1.0]\n"); fprintf(stderr, " -m maxgain Maximum gain to apply to any input file [-1.0]\n"); fprintf(stderr, " -a Compute gain (scales) automatically\n"); fprintf(stderr, " inN Input file\n"); fprintf(stderr, " scN Scale for input file (if -a isn't given)\n"); fprintf(stderr, "\n"); fprintf(stderr, " If maxgain is less than 0.0, then there is no limit to the gain.\n"); fprintf(stderr, "\n The following arguments are also allowed, but seldom needed:\n\n"); fprintf(stderr, " -s size Time in seconds of a single sample used to compute autogain [%f]\n", RandomSampleSize); fprintf(stderr, " -t time Total amount of time in seconds used to compute autogain [%f]\n", RandomSampleTime); fprintf(stderr, " -b size Number of samples to read at a time [%d]\n", BlockSize); fprintf(stderr, " -r seed Seed for random number generator (used by autogain computation) [%d]\n", RandomSeed); fprintf(stderr, "\n"); exit(1); } // usage() ////////////////////////////////////////////////////////////////////// // // Main // int main(int argc, char** argv) { extern char *optarg; extern int optind; int c, i, nargs, nfiles; char *fname; float *scales; // How much to scale the inputs SNDFILE* *sounds; SF_INFO *sfinfos; // Information about input audio files char* outfn = "-"; // - means stdout SNDFILE* out; ProgName = argv[0]; while ((c = getopt(argc, argv, "ab:g:m:o:r:s:t:v")) != EOF) { switch (c) { case 'a': AutoGain = 1; break; case 'b': BlockSize = my_atof(optarg); break; case 'g': Gain = my_atof(optarg); break; case 'm': MaxGain = my_atof(optarg); break; case 'o': outfn = strdup(optarg); break; case 'r': RandomSeed = my_atoi(optarg); break; case 's': RandomSampleSize = my_atof(optarg); break; case 't': RandomSampleTime = my_atof(optarg); break; case 'v': Verbose = 1; break; } } nargs = argc - optind; if (nargs < 1) { usage(); } // If autogain is NOT set, then the arguments must be file1 gain1 // file2 gain2 ..., and therefore there must be an even number of // arguments. if (AutoGain == 0) { if (nargs % 2 != 0) { usage(); } nfiles = nargs / 2; } else if (AutoGain == 1) { nfiles = nargs; } else { fprintf(stderr, "This should never happen!\n"); exit(-1); } sounds = (SNDFILE**) malloc(sizeof(SNDFILE*)*nfiles); MEMCHECK(sounds); scales = (float*) malloc(sizeof(float)*nfiles); MEMCHECK(scales); sfinfos = (SF_INFO*) malloc(sizeof(SF_INFO)*nfiles); MEMCHECK(sfinfos); for (i = 0; i < nfiles; i++) { if (AutoGain == 0) { fname = argv[optind + 2*i]; } else { fname = argv[optind+i]; } sounds[i] = sf_open(fname, SFM_READ, &(sfinfos[i])); if (sounds[i] == NULL) { fprintf(stderr, "%s: couldn't open '%s' as input sound file: %s\n", ProgName, fname, sf_strerror(NULL)); usage(); } if (sfinfos[i].channels != 1) { fprintf(stderr, "%s: Currently only one channel per file is supported.\n", ProgName); exit(1); } } // Now either compute or extract the scale factors if (AutoGain == 0) { for (i = 0; i < nfiles; i++) { scales[i] = my_atof(argv[optind + 2*i+1]); } } else if (AutoGain == 1) { auto_gain(nfiles, sounds, sfinfos, scales); } // Use sfinfos[0] so that the output will be the same format as the // FIRST input. out = sf_open(outfn, SFM_WRITE, &(sfinfos[0])); if (!out) { fprintf(stderr, "%s: Couldn't open output file %s: %s\n", ProgName, outfn, sf_strerror(NULL)); exit(1); } for (i = 0; i < nfiles; i++) { // If MaxGain is set, clip the gains to MaxGain if (MaxGain > 0.0 && scales[i] > MaxGain) { scales[i] = MaxGain; } // Apply output gain (applying it here is the same as applying it // to the output file) scales[i] *= Gain; if (Verbose) { fprintf(stderr, "scale[%d] = %f\n", i, scales[i]); } } // Do the work mix(nfiles, sounds, scales, out); // Clean up and exit for (i = 0; i < nfiles; i++) { sf_close(sounds[i]); } sf_close(out); free(sounds); free(scales); free(sfinfos); free(outfn); return 0; } // main() ////////////////////////////////////////////////////////////////////// // // Do the work. // // If the files do not have the same sizes, they will be left // justified, and the output will be the size of the longest input. // void mix(int nfiles, SNDFILE** sounds, float* scales, SNDFILE* out) { int i, j; long nread; long max_nread; // Biggest block read in any input float* buf; float* outbuf; int done = 0; int gotone; // True if you got ANY data from an input buf = (float*)malloc(sizeof(float)*BlockSize); MEMCHECK(buf); outbuf = (float*)malloc(sizeof(float)*BlockSize); MEMCHECK(outbuf); if (Verbose) { fprintf(stderr, "Starting mix...\n"); } while (!done) { for (j = 0; j < BlockSize; j++) { outbuf[j] = 0.0; } gotone = 0; max_nread = 0; for (i = 0; i < nfiles; i++) { nread = sf_read_float(sounds[i], buf, BlockSize); if (nread > 0) { gotone = 1; for (j = 0; j < nread; j++) { outbuf[j] += buf[j] * scales[i]; } if (nread > max_nread) max_nread = nread; } } if (gotone) { sf_write_float(out, outbuf, max_nread); } else { done = 1; } } free(buf); free(outbuf); if (Verbose) { fprintf(stderr, "Done.\n"); } } // mix() ////////////////////////////////////////////////////////////////////// // // Given the set of sound files, compute all the scaling factors so // that the loudest sound doesn't change, and all the others are // equalized. // // Note: The sounds are all rewound to the start as a side effect. // void auto_gain(int nfiles, SNDFILE** sounds, SF_INFO* sfinfos, float* scales) { int i; float minscale; if (Verbose) { fprintf(stderr, "Computing auto-gain...\n"); } scales[0] = 1.0 / std_snd(sounds[0], &(sfinfos[0])); minscale = scales[0]; for (i = 1; i < nfiles; i++) { scales[i] = 1.0 / std_snd(sounds[i], &(sfinfos[i])); if (minscale > scales[i]) minscale = scales[i]; } for (i = 0; i < nfiles; i++) { scales[i] /= minscale; sf_seek(sounds[i], 0, SEEK_SET); } } // auto_gain() ////////////////////////////////////////////////////////////////////// // // Compute the standard deviation of the given sound. // See comments in the header of this file for the algorithm and // efficiency concerns. // float std_snd(SNDFILE* in, SF_INFO* sfinfo) { float std; if (sfinfo->frames < RandomSampleTime * sfinfo->samplerate) { std = std_snd_all(in, sfinfo); } else { std = std_snd_random(in, sfinfo, RandomSampleTime*sfinfo->samplerate, RandomSampleSize*sfinfo->samplerate); } sf_seek(in, 0, SEEK_SET); // Rewind to the start of the snd return std; } // std_snd() ////////////////////////////////////////////////////////////////////// // // Compute the stddev of the entire file. // float std_snd_all(SNDFILE* in, SF_INFO* sfinfo) { float* buf; int i; int nread; double std; stat* s; buf = (float*) malloc(sizeof(float)*BlockSize); MEMCHECK(buf); s = create_stat(); do { nread = sf_read_float(in, buf, BlockSize); for (i = 0; i < nread; i++) { stat_datum(s, buf[i]); } } while (nread > 0); std = stat_std(s); delete_stat(s); free(buf); return std; } // std_snd_all() ////////////////////////////////////////////////////////////////////// // // Compute the approximate stddev of the input file by subsampling. // Each subsample is sample_frames long, and random sampling will // occur until to_read frames have been read. // // This is very inefficient if seeks are expensive (e.g. if the file // is compressed) or if total_frames is close to the file size. // float std_snd_random(SNDFILE* in, SF_INFO* sfinfo, int sample_frames, long to_read) { double std; stat* s; long nread; float* buf; int i; long random_frame; srand(RandomSeed); buf = (float*) malloc(sizeof(float)*sample_frames); MEMCHECK(buf); s = create_stat(); while (to_read > 0) { random_frame = ((long long) rand()) * sfinfo->frames / RAND_MAX; sf_seek(in, random_frame, SEEK_SET); nread = sf_read_float(in, buf, sample_frames); for (i = 0; i < nread; i++) { stat_datum(s, buf[i]); } to_read -= nread; } std = stat_std(s); delete_stat(s); free(buf); return std; } // std_snd_random() ////////////////////////////////////////////////////////////////////// // // Error checking versions of atof and atoi // float my_atof(char* in) { float out; if (sscanf(in, "%f", &out) != 1) { usage(); } return out; } // my_atof() int my_atoi(char* in) { int out; if (sscanf(in, "%d", &out) != 1) { usage(); } return out; } // my_atoi() ////////////////////////////////////////////////////////////////////// // // The following function are used to implement a univariate // statistics class (taken from jstat.cc). // ////////////////////////////////////////////////////////////////////// // // Create and return a new stat. // stat* create_stat() { stat* s; s = (stat*) malloc(sizeof(stat)); MEMCHECK(s); s->n = 0; s->min = 0; s->max = 0; s->sum = 0; s->sum2 = 0; return s; } // new_stat() void delete_stat(stat* s) { if (s) free(s); } // delete_stat() ////////////////////////////////////////////////////////////////////// // // Make sure the given stat is not null. // Make sure the given stat has at least n entries. // Otherwise, print errors and exit. // void stat_check(stat* s, int n) { if (!s) { fprintf(stderr, "%s: Stat not set!\n", ProgName); exit(1); } if (s->n < n) { fprintf(stderr, "%s: Not enough data to determine value (need %d, got %d)\n", ProgName, n, s->n); exit(1); } } // stat_check() void stat_datum(stat* s, double v) { stat_check(s, 0); s->sum += v; s->sum2 += v*v; s->n++; if (v < s->min) { s->min = v; } if (v > s->max) { s->max = v; } } // stat_datum() double stat_mean(stat* s) { stat_check(s, 1); return s->sum / s->n; } // stat_mean() double stat_std(stat* s) { stat_check(s, 2); return sqrt((s->sum2 - (s->sum * s->sum / s->n) ) / (s->n - 1)); } // stat_std() double stat_min(stat* s) { stat_check(s, 1); return s->min; } // stat_min() double stat_max(stat* s) { stat_check(s, 1); return s->max; } // stat_max() int stat_n(stat* s) { stat_check(s, 0); return s->n; } // stat_n()