/////////////////////////////////////////////////////////////////////
//
// File: soundmix.c
// Author: Adam Janin
//         adam@janin.org
//         04/27/04
//
// Copyright (C) 2004 International Computer Science Institute
//
// Originally from jsndmix.cc (02/21/00), which used internal ICSI
// libraries for audio and statistics handling, and was written in C++.
//
// This version requires only libsndfile, which is licensed under the 
// Gnu Lesser General Public License, and is available at:
//   http://www.mega-nerd.com/libsndfile/
//
// For more information on the ICSI Meeting Corpus, see:
//   http://www.icsi.berkeley.edu/Speech/mr
//
// This program is provided AS IS. It has not been extensively tested,
// and we offer no support or warranty.
//


//
// Given a set of audio files in many different formats, produce an
// output audio file that is the mix of the input files.
//
// You can either provide a gain for each input audio file, or the
// program can estimate the gain required to equalize the volume.
// The autogain computation is quite primitive. See comments below.
//
// See usage() for command line arguments.
//
// This program should be very to easy to compile, as it only depends
// on libsndfile and libm. On most UNIX systems:
//
// gcc -c soundmix.c
// gcc -o soundmix soundmix.o -lsndfile -lm
//
// You may need to add -Idir to the first line and -Ldir to the second,
// where dir points to libsndfile files (sndfile.h and libsndfile.a).
//


//
// The autogain is computed by normalizing the input signals by their
// standard deviation. This is computed by taking random samples of
// size RandomSampleSize seconds until RandomSampleTime seconds have
// been collected, and computing the stddev over that size. Note that
// this requires lots of seeks, and will be very inefficient if the
// stream is compressed or if RandomSampleTime is a large fraction of
// the file size. You can specify a maximum gain. This helps prevent
// very quiet channels from being boosted too much. You can also apply
// a gain setting to the resulting output audio. Using a value less
// than 1.0 helps prevent "clipping" when the signals are close to
// saturation.
// 


// RCS $Header: /n/www/export/htdocs/speech/mr/tools/RCS/soundmix.c,v 1.2 2004/08/16 20:36:51 janin Exp $


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>

#include <sndfile.h>

//////////////////////////////////////////////////////////////////////
//
// Defines
//

#define MEMCHECK(x)                                                \
if (!x) {                                                          \
  fprintf(stderr, "Out of memory in %s line %d\n",                 \
          __FILE__, __LINE__);                                     \
  exit(1);                                                         \
}


//////////////////////////////////////////////////////////////////////
//
// Globals
//

char* ProgName;
int Verbose = 0;

// See comments above
int AutoGain = 0;
float RandomSampleTime = 300.0;	// 5 minutes
float RandomSampleSize = 2.0;	// 2 seconds

// If autogain is used, the samples are picked randomly. Using
// different random seeds will result is slightly different gains,
// since the sampling will be different.
int RandomSeed = 0;

// Maximum gain to apply to any channel. <= 0 implies no max.
float MaxGain = -1.0;

// Buffer size (to hold one block of audio data while processing)
int BlockSize = 8192;

// Gain to apply to summed signal (after autogain or individual
// gains). Use a value less than 1.0 if there's clipping in the
// resulting file.
float Gain = 1.0;

//////////////////////////////////////////////////////////////////////
//
// Prototypes
//

void usage();

void mix(int nfiles, SNDFILE** sounds, float* scales, SNDFILE* out);
void auto_gain(int nfiles, SNDFILE** sounds, SF_INFO* sfinfo, float* scales);
float std_snd(SNDFILE* in, SF_INFO* sfinfo);
float std_snd_all(SNDFILE* in, SF_INFO* sfinfo);
float std_snd_random(SNDFILE* in, SF_INFO* sfinfo,
		     int sample_frames, long to_read);
float my_atof(char*);
int   my_atoi(char*);

//////////////////////////////////////////////////////////////////////
//
//  The following type and prototypes are an implementation of a
//  univariate statistics class (taken from jstats.cc). They are used
//  to compute the autogain.
//

typedef struct {
  double sum;		//  Sum of data points
  double sum2;		//  Sum of square of data points
  double min;		//  Smallest value
  double max;		//  Largest value
  int    n;		//  Number of values
} stat;

stat* create_stat();
void  delete_stat();
void stat_check(stat*, int);
void stat_datum(stat*, double);
double stat_mean(stat*);
double stat_std(stat*);
double stat_min(stat*);
double stat_max(stat*);
int stat_n(stat*);

//////////////////////////////////////////////////////////////////////
//
// Print command line usage and exit.
//

void usage() {
  fprintf(stderr, "\nUsage: %s -v -m maxgain -o outfile -g gain in1 sc1 in2 sc2 ...  OR\n",
	  ProgName);
  fprintf(stderr, "       %s -a -v -m maxgain -o outfile -g gain in1 in2 ...\n",
	  ProgName);
  fprintf(stderr, "where\n");
  fprintf(stderr, " -o outfile  Output file [stdout]\n");
  fprintf(stderr, " -g gain     Gain to apply to output file [1.0]\n");
  fprintf(stderr, " -m maxgain  Maximum gain to apply to any input file [-1.0]\n");
  fprintf(stderr, " -a          Compute gain (scales) automatically\n");
  fprintf(stderr, " inN         Input file\n");
  fprintf(stderr, " scN         Scale for input file (if -a isn't given)\n");
  fprintf(stderr, "\n");
  fprintf(stderr, "    If maxgain is less than 0.0, then there is no limit to the gain.\n");
  fprintf(stderr, "\n The following arguments are also allowed, but seldom needed:\n\n");
  fprintf(stderr, " -s size     Time in seconds of a single sample used to compute autogain [%f]\n", RandomSampleSize);
  fprintf(stderr, " -t time     Total amount of time in seconds used to compute autogain [%f]\n", RandomSampleTime);
  fprintf(stderr, " -b size     Number of samples to read at a time [%d]\n",
	  BlockSize);
  fprintf(stderr, " -r seed     Seed for random number generator (used by autogain computation) [%d]\n", RandomSeed);
  fprintf(stderr, "\n");
  exit(1);
}  // usage()


//////////////////////////////////////////////////////////////////////
//
// Main
//

int main(int argc, char** argv) {
  extern char *optarg;
  extern int optind;

  int c, i, nargs, nfiles;
  char *fname;

  float *scales;		// How much to scale the inputs
  SNDFILE* *sounds;
  SF_INFO *sfinfos;		// Information about input audio files
  char* outfn = "-";		// - means stdout
  SNDFILE* out;
  
  ProgName = argv[0];

  while ((c = getopt(argc, argv, "ab:g:m:o:r:s:t:v")) != EOF) {
    switch (c) {
    case 'a':
      AutoGain = 1;
      break;
    case 'b':
      BlockSize = my_atof(optarg);
      break;
    case 'g':
      Gain = my_atof(optarg);
      break;
    case 'm':
      MaxGain = my_atof(optarg);
      break;
    case 'o':
      outfn = strdup(optarg);
      break;
    case 'r':
      RandomSeed = my_atoi(optarg);
      break;
    case 's':
      RandomSampleSize = my_atof(optarg);
      break;
    case 't':
      RandomSampleTime = my_atof(optarg);
      break;
    case 'v':
      Verbose = 1;
      break;
    }
  }

  nargs = argc - optind;

  if (nargs < 1) {
    usage();
  }

  // If autogain is NOT set, then the arguments must be file1 gain1
  // file2 gain2 ..., and therefore there must be an even number of
  // arguments. 

  if (AutoGain == 0) {
    if (nargs % 2 != 0) {
      usage();
    }

    nfiles = nargs / 2;
  } else if (AutoGain == 1) {
    nfiles = nargs;
  } else {
    fprintf(stderr, "This should never happen!\n");
    exit(-1);
  }

  sounds = (SNDFILE**) malloc(sizeof(SNDFILE*)*nfiles);
  MEMCHECK(sounds);

  scales = (float*) malloc(sizeof(float)*nfiles);
  MEMCHECK(scales);

  sfinfos = (SF_INFO*) malloc(sizeof(SF_INFO)*nfiles);
  MEMCHECK(sfinfos);
  
  for (i = 0; i < nfiles; i++) {
    if (AutoGain == 0) {
      fname = argv[optind + 2*i];
    } else {
      fname = argv[optind+i];
    }
    
    sounds[i] = sf_open(fname, SFM_READ, &(sfinfos[i]));
    if (sounds[i] == NULL) {
      fprintf(stderr, "%s: couldn't open '%s' as input sound file: %s\n",
	      ProgName, fname, sf_strerror(NULL));
      usage();
    }

    if (sfinfos[i].channels != 1) {
      fprintf(stderr, "%s: Currently only one channel per file is supported.\n",
	      ProgName);
      exit(1);
    }
  }

  // Now either compute or extract the scale factors

  if (AutoGain == 0) {
    for (i = 0; i < nfiles; i++) {
      scales[i] = my_atof(argv[optind + 2*i+1]);
    }
  } else if (AutoGain == 1) {
    auto_gain(nfiles, sounds, sfinfos, scales);
  }

  // Use sfinfos[0] so that the output will be the same format as the
  // FIRST input.

  out = sf_open(outfn, SFM_WRITE, &(sfinfos[0]));
  
  if (!out) {
    fprintf(stderr, "%s: Couldn't open output file %s: %s\n",
	    ProgName, outfn, sf_strerror(NULL));
    exit(1);
  }
  
  for (i = 0; i < nfiles; i++) {

    // If MaxGain is set, clip the gains to MaxGain
    if (MaxGain > 0.0 && scales[i] > MaxGain) {
      scales[i] = MaxGain;
    }

    // Apply output gain (applying it here is the same as applying it
    // to the output file)
    scales[i] *= Gain;

    if (Verbose) {
      fprintf(stderr, "scale[%d] = %f\n", i, scales[i]);
    }
  }

  // Do the work

  mix(nfiles, sounds, scales, out);

  // Clean up and exit

  for (i = 0; i < nfiles; i++) {
    sf_close(sounds[i]);
  }
  sf_close(out);
  free(sounds);
  free(scales);
  free(sfinfos);
  free(outfn);
    
  return 0;
}  // main()

//////////////////////////////////////////////////////////////////////
//
// Do the work.
//
// If the files do not have the same sizes, they will be left
// justified, and the output will be the size of the longest input.
//

void mix(int nfiles, SNDFILE** sounds, float* scales, SNDFILE* out) {
  int i, j;
  long nread;
  long max_nread;		//  Biggest block read in any input
  float* buf;
  float* outbuf;
  int done = 0;
  int gotone;			// True if you got ANY data from an input

  buf = (float*)malloc(sizeof(float)*BlockSize);
  MEMCHECK(buf);
  
  outbuf = (float*)malloc(sizeof(float)*BlockSize);
  MEMCHECK(outbuf);
  
  if (Verbose) {
    fprintf(stderr, "Starting mix...\n");
  }

  while (!done) {
    for (j = 0; j < BlockSize; j++) {
      outbuf[j] = 0.0;
    }
    gotone = 0;
    max_nread = 0;
    for (i = 0; i < nfiles; i++) {
      nread = sf_read_float(sounds[i], buf, BlockSize);
      if (nread > 0) {
	gotone = 1;
	for (j = 0; j < nread; j++) {
	  outbuf[j] += buf[j] * scales[i];
	}
	if (nread > max_nread) max_nread = nread;
      }
    }
    if (gotone) {
      sf_write_float(out, outbuf, max_nread);
    } else {
      done = 1;
    }
  }
  free(buf);
  free(outbuf);
  
  if (Verbose) {
    fprintf(stderr, "Done.\n");
  }
}  // mix()

//////////////////////////////////////////////////////////////////////
//
// Given the set of sound files, compute all the scaling factors so
// that the loudest sound doesn't change, and all the others are
// equalized.
//
// Note: The sounds are all rewound to the start as a side effect.
//

void auto_gain(int nfiles, SNDFILE** sounds, SF_INFO* sfinfos, float* scales) {
  int i;
  float minscale;

  if (Verbose) {
    fprintf(stderr, "Computing auto-gain...\n");
  }

  scales[0] = 1.0 / std_snd(sounds[0], &(sfinfos[0]));
  minscale = scales[0];

  for (i = 1; i < nfiles; i++) {
    scales[i] = 1.0 / std_snd(sounds[i], &(sfinfos[i]));
    if (minscale > scales[i]) minscale = scales[i];
  }
  for (i = 0; i < nfiles; i++) {
    scales[i] /= minscale;
    sf_seek(sounds[i], 0, SEEK_SET);
  }
}  // auto_gain()

//////////////////////////////////////////////////////////////////////
//
// Compute the standard deviation of the given sound.
// See comments in the header of this file for the algorithm and
// efficiency concerns.
//

float std_snd(SNDFILE* in, SF_INFO* sfinfo) {
  float std;
  
  if (sfinfo->frames < RandomSampleTime * sfinfo->samplerate) {
    std = std_snd_all(in, sfinfo);
  } else {
    std = std_snd_random(in, sfinfo,
			 RandomSampleTime*sfinfo->samplerate, 
			 RandomSampleSize*sfinfo->samplerate);
  }
  sf_seek(in, 0, SEEK_SET);	// Rewind to the start of the snd
  return std;
}  // std_snd()

//////////////////////////////////////////////////////////////////////
//
// Compute the stddev of the entire file.
//

float std_snd_all(SNDFILE* in, SF_INFO* sfinfo) {
  float* buf;
  int i;
  int nread;
  double std;
  stat* s;
  
  buf = (float*) malloc(sizeof(float)*BlockSize);
  MEMCHECK(buf);

  s = create_stat();
    
  do {
    nread = sf_read_float(in, buf, BlockSize);
    for (i = 0; i < nread; i++) {
      stat_datum(s, buf[i]);
    }
  } while (nread > 0);
  std = stat_std(s);
  delete_stat(s);
  free(buf);
  return std;
}  // std_snd_all()

//////////////////////////////////////////////////////////////////////
//
// Compute the approximate stddev of the input file by subsampling.
// Each subsample is sample_frames long, and random sampling will
// occur until to_read frames have been read.
//
// This is very inefficient if seeks are expensive (e.g. if the file
// is compressed) or if total_frames is close to the file size.
//

float std_snd_random(SNDFILE* in, SF_INFO* sfinfo,
		     int sample_frames, long to_read) {
  double std;
  stat* s;
  long nread;
  float* buf;
  int i;
  long random_frame;

  srand(RandomSeed);

  buf = (float*) malloc(sizeof(float)*sample_frames);
  MEMCHECK(buf);
  s = create_stat();
  
  while (to_read > 0) {
    random_frame = ((long long) rand()) * sfinfo->frames / RAND_MAX;
    sf_seek(in, random_frame, SEEK_SET);
    nread = sf_read_float(in, buf, sample_frames);
    for (i = 0; i < nread; i++) {
      stat_datum(s, buf[i]);
    }
    to_read -= nread;
  }
  std = stat_std(s);
  delete_stat(s);
  free(buf);
  return std;
}  // std_snd_random()

//////////////////////////////////////////////////////////////////////
//
// Error checking versions of atof and atoi
//

float my_atof(char* in) {
  float out;
  if (sscanf(in, "%f", &out) != 1) {
    usage();
  }
  return out;
}  // my_atof()

int my_atoi(char* in) {
  int out;
  if (sscanf(in, "%d", &out) != 1) {
    usage();
  }
  return out;
}  // my_atoi()


//////////////////////////////////////////////////////////////////////
//
// The following function are used to implement a univariate
// statistics class (taken from jstat.cc).
//

//////////////////////////////////////////////////////////////////////
//
// Create and return a new stat.
//

stat* create_stat() {
  stat* s;
  s = (stat*) malloc(sizeof(stat));
  MEMCHECK(s);
  s->n = 0;
  s->min = 0;
  s->max = 0;
  s->sum = 0;
  s->sum2 = 0;
  return s;
}  // new_stat()

void delete_stat(stat* s) {
  if (s) free(s);
}  // delete_stat()

//////////////////////////////////////////////////////////////////////
//
// Make sure the given stat is not null.
// Make sure the given stat has at least n entries.
// Otherwise, print errors and exit.
//

void stat_check(stat* s, int n) {
  if (!s) {
    fprintf(stderr, "%s: Stat not set!\n", ProgName);
    exit(1);
  }
  if (s->n < n) {
    fprintf(stderr, "%s: Not enough data to determine value (need %d, got %d)\n",
	    ProgName, n, s->n);
    exit(1);
  }
}  // stat_check()

void stat_datum(stat* s, double v) {
  stat_check(s, 0);
  s->sum += v;
  s->sum2 += v*v;
  s->n++;
  if (v < s->min) {
    s->min = v;
  }
  if (v > s->max) {
    s->max = v;
  }
}  // stat_datum()

double stat_mean(stat* s) {
  stat_check(s, 1);
  return s->sum / s->n;
}  // stat_mean()

double stat_std(stat* s) {
  stat_check(s, 2);
  return sqrt((s->sum2 - (s->sum * s->sum / s->n) ) / (s->n - 1));
}  // stat_std()

double stat_min(stat* s) {
  stat_check(s, 1);
  return s->min;
}  // stat_min()

double stat_max(stat* s) {
  stat_check(s, 1);
  return s->max;
}  // stat_max()

int stat_n(stat* s) {
  stat_check(s, 0);
  return s->n;
}  // stat_n()