// SBD.cpp : A minimal implementation of two methods for computing symmetrical centroids:
//
// - The symmetrical Kullback-Leibler (SKL) centroid following:
// "The Centroid of the Symmetrical Kullback-Leibler Distance" by Raymond VELDHUIS
//  IEEE signal processing letters, 9 (3). pp. 96-99
//
// and
//
// - The centroid of symmetrized Bregman divergences (including the above SKL) following:
// "On the Centroids of Symmetrized Bregman Divergences" by Frank NIELSEN and Richard NOCK 
//  Sony CSL Technical Report, 2007
//
// (c) March-November 2007 Frank NIELSEN (Frank.Nielsen@acm.org)
// All rights reserved 

// Debugged and cross-checked on November 7th 2007

#include "stdafx.h"
#include <math.h>
#include <windows.h>
using namespace std;

// Variables for timing
DWORD startt1,stopt1,startt2,stopt2;
double dt1,dt2;


// Image histograms are stored as unit vector points
#define DIM 256

// Ensure a non-zero random number in (0,1) so that
// Kullback-Leibler divergence will be always bounded.
inline double drand()
{
	return 1.0e-5+(double)rand ()/(double)RAND_MAX ; 
}

//
// Basic point class
//
class Point
{
public:
int d; // dimension
double *coord; // array of coordinates

// Set to zero
void zero()
{
	for(int i=0;i<d;i++) 
		   coord[i]=0.0;
}

// Normalize to unit vector (histogram)
Point* normalized()
{
double s=0;
Point* result=new Point(d);

for(int i=0;i<d;i++) {s+=coord[i];}

if (s!=0.0) 
	{for(int i=0;i<d;i++) result->coord[i]=coord[i]/s;}

return result;
}

double sum()
{
double s=0;

for(int i=0;i<d;i++) {s+=coord[i];}
return s;
}

void unit()
{
double s=sum();
if (s!=0.0) {for(int i=0;i<d;i++) coord[i]=coord[i]/s;}

}


// Constructor
Point()
{
d=0;
coord=NULL;
}

// Constructor: random point set with all non-zero coordinates, normalized to one
Point(int dd)
{
	d=dd;
	coord=new double[d];

for(int i=0;i<d;i++) 
	coord[i]=drand();

unit();
}

// Destructor
~Point()
{
	if (coord!=NULL) {delete [] coord;}
}

// Save in .data file for display with Gnuplot
void Save(char *filename)
{
FILE *f=fopen(filename,"w");

for(int i=0;i<d;i++)
	fprintf(f,"%d\t%e\n",i,coord[i]);

fclose(f);
}

// Read point from .data file
Point(char *filename)
{
FILE *f=fopen(filename,"r");
int tmp;
d=256;
coord=new double[d];

for(int i=0;i<d;i++)
	fscanf(f,"%d\t%lf",&tmp,&coord[i]);

fclose(f);
}

// Assume at least 2D points (binary histograms)
friend ostream & operator<<(ostream &output, const Point &p)
{int i;

if (p.d>0){
output<<"["<<p.d<<"] (";

for(i=0;i<p.d-1;i++)
	output<<p.coord[i]<<",";

output<<p.coord[p.d-1]<<")";
}
else output<<"(0D point)";
return output;
}
};

// Inner product of two points
double InnerProduct(Point* p, Point* q)
{
int i;
double r=0.0;

for(i=0;i<p->d;i++) 
	r+=p->coord[i]*q->coord[i];

return r;
}

// The J-divergence, a particular symmetrized Bregman divergence in disguise
double SymmetrizedKullbackLeibler(Point* p,Point* q)
{
double r=0.0;

for(int i=0;i<p->d;i++)
{
	r+= ( p->coord[i]*log(p->coord[i]/q->coord[i])+ q->coord[i]*log(q->coord[i]/p->coord[i]) );
}

return 0.5*r;
}

// A class for storing point set
class Set
{
public: 
int n;
Point **array;

// constructor
Set(int nn, int dd)
{
	n=nn; 
	array=new Point*[n];
	//cout<<"Created point array"<<endl;
	for(int i=0;i<n;i++) 
			array[i]=new Point(dd);
}

~Set(){
	if (array!=NULL) 
		{for(int i=0;i<n;i++) delete array[i]; delete [] array;}
}


friend ostream & operator<<(ostream &output, const Set set)
{int i;

output<<"["<<set.n<<"] (";

for(i=0;i<set.n;i++)
	output<<*set.array[i]<<endl;

return output;
}


};

// Functions for computing generalized means

// Function for the arithmetic mean
Point* Identity(Point* p)
{
	Point *result=new Point(p->d);
	for(int i=0;i<p->d;i++) 
		result->coord[i]=p->coord[i];

	return result;
}

// Functions for the geometric mean
Point* Log(Point* p)
{
	Point *result=new Point(p->d);
	for(int i=0;i<p->d;i++) 
		result->coord[i]=log(p->coord[i]);
	
	return result;
}

Point* Exp(Point* p)
{
Point *result=new Point(p->d);
	for(int i=0;i<p->d;i++) 
		result->coord[i]=exp(p->coord[i]);
	
	return result;
}


//
// Compute generalized means also called quasi-arithmetic or f-means
//
Point* GeneralizedMeans(Set* set, Point*(* FunctionGrad)(Point*), Point*(*FunctionGradinv)(Point*))
{
int i,j;
int d=set->array[0]->d;
double w=(1.0/(double)(set->n));
Point* result=new Point(set->array[0]->d);
Point* addp;

result->zero();

for(i=0;i<set->n;i++)
	{
	addp=FunctionGrad(set->array[i]);

	// Centroid: uniform weight vector w (barycenters allow arbitrary unit vectors)
	for(j=0;j<d;j++) 
			result->coord[j]+= w*addp->coord[j];

	delete addp;
	}

return (*FunctionGradinv)(result);
}

//
// The minimum average is the Jensen-Shannon difference, a Burbea-Rao divergence
//
double InformationRadius(Set* set, Point* centroid)
{
double r=0.0;

for(int i=0;i<set->n;i++) 
	r+=SymmetrizedKullbackLeibler(set->array[i],centroid);

return r/(double)(set->n);
}

//
// Veldhuis' symmetrical Kullback-Leibler centroid algorithm 
// IEEE Signal Processing Letters, 2002
// Proceeds by convex programming using a renormalization step
//
Point* SKLConvexPrograming(Set* set)
{
Point* arithmetic, *geometric, *normalizedgeometric, *sklcentroid;
Point* x,*y;
double c, sum;
int k,d;

d=set->array[0]->d;
x=new Point(d); y=new Point(d); sklcentroid=new Point(d);

// Initialization
arithmetic=GeneralizedMeans(set, Identity, Identity);
geometric=GeneralizedMeans(set, Log, Exp);
normalizedgeometric=geometric->normalized();

c=-1;

// Main loop
for(int loop1=1;loop1<10;loop1++)
{

	for(k=0;k<d;k++)
	{
		y->coord[k]=arithmetic->coord[k]/(normalizedgeometric->coord[k]*exp(c));
		x->coord[k]=1.0;
	}

	// First inner loop: adjust x 
	for(int loop2=1;loop2<5;loop2++)
	{
	for(k=0;k<d;k++)
		x->coord[k]=x->coord[k]-(x->coord[k]*log(x->coord[k])-y->coord[k])/(1.0+log(x->coord[k]));
	}

	// Second inner loop: adjust parameter c
	for(int loop3=1;loop3<5;loop3++)
	{
		sum=0.0;
		for(k=0;k<d;k++)
				sum+=x->coord[k]*normalizedgeometric->coord[k]*exp(c);

		c=c-(sum-1.0)/sum;
	}

}

//
// Update the centroid
//
for(k=0;k<d;k++)
	sklcentroid->coord[k]=x->coord[k]*normalizedgeometric->coord[k]*exp(c);

// Clean
delete arithmetic;
delete geometric;
delete normalizedgeometric;
delete x;
delete y;

return sklcentroid;
}

//
// Second method: Symmetrized Bregman divergences
//


// Functions for handling natural parameters of multinomials (of the exponential family)
Point* MultinomialGradF(Point* theta) 
{
double s=0.0;
Point * result=new Point(theta->d);

for(int i=0;i<theta->d;i++)
		s+=( exp(theta->coord[i]) );

for(i=0;i<theta->d;i++)
	result->coord[i]=exp(theta->coord[i])/(1.0+s);

return result;
}

Point*  MultinomialGradFinv(Point* eta)
{
double s=0.0;
int i;
Point * result=new Point(eta->d);

for(i=0;i<eta->d;i++)
	s+=( eta->coord[i] );

for(i=0;i<eta->d;i++)
	result->coord[i]=log(eta->coord[i]/(1.0-s));

return result;
}

class BregmanDivergence
{
public:
	virtual double F(Point* p)=0;
	virtual Point* gradF(Point* p)=0;
	virtual Point* gradFinv(Point* p)=0;

	BregmanDivergence(){}
	~BregmanDivergence(){}

	// Part common to all Bregman divergences
	double Divergence(Point* p, Point* q)
		{
			Point *pq;
			double r;
			pq=new Point(p->d);
			for(int i=0;i<p->d;i++) 
					pq->coord[i]=p->coord[i]-q->coord[i];
			r=F(p)-F(q)-InnerProduct(pq,gradF(q));
			delete pq;
			return r; }

	Point* GeodesicPoint(Point* p, Point* q, double l)
	{
	Point *result=new Point(p->d), *fresult;
	int i;
	Point* gradp, *gradq;

	gradp=gradF(p);
	gradq=gradF(q);

	for(i=0;i<p->d;i++)
		result->coord[i]=(1.0-l)*gradp->coord[i]+l*gradq->coord[i];

	delete gradp;
	delete gradq;
	
	fresult=gradFinv(result);
	
	delete result;

	return fresult;
	}


};


//
// Kullback-Leibler distance for multinomial distribution
// (a member of the exponential family)
class BDMultinomial: public BregmanDivergence
{public:

double F(Point* theta) 
{
double s=0.0;

for(int i=0;i<theta->d;i++)
	s+=( exp(theta->coord[i]) );

return log(1.0+s);
}

Point* gradF(Point* theta){ return MultinomialGradF(theta);}
Point* gradFinv(Point* eta){ return MultinomialGradFinv(eta);}
};
// End of the KL multinomial Bregman divergence class


// Convert d-dimensional distribution to (d-1)-dimensional multinomial
// (Lower by one dimension)
Point* DistributionToMultinomial(Point* p)
{
int i;
int d=p->d;
Point* result=new Point(d-1);

double den=p->coord[d-1];

for(i=0;i<d-1;i++)
	result->coord[i]=log(p->coord[i]/den);


return result;
}

// Convert (d-1)-dimensional multinomial to d-dimensional distribution
// (Augment by one dimension)
Point* MultinomialToDistribution(Point* p)
{
int i;
int d=p->d;
Point* result=new Point(d+1);
double s=0.0;

for(i=0;i<d;i++) s+=exp(p->coord[i]);

result->coord[d]=1.0/(1.0+s);

for(i=0;i<d;i++)
result->coord[i]=result->coord[d]*exp(p->coord[i]);

return result;
}

//
// Walk on the geodesic to find the 2-mean (non-uniform weight)
// (Nielsen and Nock, 2007)
//
Point* SBDGeodesicWalk(Set *theta)
{
int i,dm1=theta->array[0]->d;
Point *centroid;
Point *thetaR=new Point (dm1);
Point *thetaL=new Point(dm1);
Point *pthetageodesic=new Point(dm1);
double l, lmin,  lmax;
BregmanDivergence *BD=new BDMultinomial();

// STEP 1: convert to multinomial natural parameters
// This was done before since we received already natural parameters in an array

// STEP 2: Compute the right-type and left-type sided centroids
thetaR=GeneralizedMeans(theta,Identity,Identity);
thetaL=GeneralizedMeans(theta, MultinomialGradF ,MultinomialGradFinv);

// We do not need theta set anymore from now on, just the sided centroids
//thetaR->Save("nthetaR.data");thetaL->Save("nthetaL.data");

// STEP 3: Take a walk on the geodesic 
// (either in curved or primal space): Here in curved primal space

		lmin=0.0;lmax=1.0; 

		while( (lmax-lmin) >1.0e-6)
		{
		l=(lmin+lmax)/2.0;
		
		pthetageodesic=BD->GeodesicPoint(thetaR, thetaL, l);
		
		if (BD->Divergence(thetaR,pthetageodesic)>BD->Divergence(pthetageodesic,thetaL)) 
			lmax=l; 
			else 
			lmin=l;
		}

		l=(lmin+lmax)/2.0;
		pthetageodesic=BD->GeodesicPoint(thetaR, thetaL, l);

		//pthetageodesic->Save("nthetageodesic.data");
		
// STEP 4: Transform back multinomial natural (d-1)-dimensional parameters to d-dimensional distribution parameters		
		centroid=MultinomialToDistribution(pthetageodesic);

delete BD;
delete thetaL;
delete thetaR;
delete pthetageodesic;

return centroid;
}


//
// Main entry program for comparing Veldhuis' approach with the geodesic walk approach
//
int _tmain(int argc, _TCHAR* argv[])
{
	Set *set, *theta;
	Point *centroidCP, *centroidBD;
	char filename[256];
	int i,n=5000;
	

	cout<<"Compute the symmetrical Kullback-Leibler centroid of a set of probability distributions using two methods: (1) Veldhuis's convex programming (IEEE SPL'02) and the generic symmetrized Bregman divergence (Nielsen and Nock, 2007)."<<endl;
	cout<<"(c) 2007 All rights reserved Frank Nielsen (Frank.Nielsen@acm.org)."<<endl;
	cout.precision(20);

	set=new Set(n,DIM);
	theta=new Set(n,DIM-1);

	for(i=0;i<set->n;i++)
		theta->array[i]=DistributionToMultinomial(set->array[i]);

	/* // For reading from input point text files
	for(i=0;i<n;i++) {sprintf(filename,"%d.data",i);set->array[i]=new Point(filename);}
	*/
	
	startt1=GetTickCount();
	centroidCP=SKLConvexPrograming(set);
	stopt1=GetTickCount();
	dt1=(float)(stopt1-startt1);

	startt2=GetTickCount();
	centroidBD=SBDGeodesicWalk(theta);
	stopt2=GetTickCount();
	dt2=(float)(stopt2-startt2);


	cout<<"Centroid using SKL Veldhuis'method:"<<endl;
	cout<<"Jensen-Shannon information radius="<<InformationRadius(set,centroidCP)<<endl;

	
	cout<<"Centroid using SBD geodesic walk method:"<<endl;
	cout<<"Jensen-Shannon information radius="<<InformationRadius(set,centroidBD)<<endl;

	cout<<"Symmetric Kullback-Leibler divergence between the two centroids:\n"<<SymmetrizedKullbackLeibler(centroidCP,centroidBD)<<endl;
	
	cout<<"Running time: Veldhuis "<<dt1<<" versus geodesic walk "<<dt2<<endl;

	// Cleaning
	delete set;
	delete theta;
	delete centroidCP;
	delete centroidBD;
	
	cout<<"Press <RETURN> key to exit."<<endl; getchar();
	return 0;
}

