// SBD.cpp : A minimal implementation of two methods for computing symmetrical centroids: // // - The symmetrical Kullback-Leibler (SKL) centroid following: // "The Centroid of the Symmetrical Kullback-Leibler Distance" by Raymond VELDHUIS // IEEE signal processing letters, 9 (3). pp. 96-99 // // and // // - The centroid of symmetrized Bregman divergences (including the above SKL) following: // "On the Centroids of Symmetrized Bregman Divergences" by Frank NIELSEN and Richard NOCK // Sony CSL Technical Report, 2007 // // (c) March-November 2007 Frank NIELSEN (Frank.Nielsen@acm.org) // All rights reserved // Debugged and cross-checked on November 7th 2007 #include "stdafx.h" #include #include using namespace std; // Variables for timing DWORD startt1,stopt1,startt2,stopt2; double dt1,dt2; // Image histograms are stored as unit vector points #define DIM 256 // Ensure a non-zero random number in (0,1) so that // Kullback-Leibler divergence will be always bounded. inline double drand() { return 1.0e-5+(double)rand ()/(double)RAND_MAX ; } // // Basic point class // class Point { public: int d; // dimension double *coord; // array of coordinates // Set to zero void zero() { for(int i=0;icoord[i]=coord[i]/s;} return result; } double sum() { double s=0; for(int i=0;i0){ output<<"["<d;i++) r+=p->coord[i]*q->coord[i]; return r; } // The J-divergence, a particular symmetrized Bregman divergence in disguise double SymmetrizedKullbackLeibler(Point* p,Point* q) { double r=0.0; for(int i=0;id;i++) { r+= ( p->coord[i]*log(p->coord[i]/q->coord[i])+ q->coord[i]*log(q->coord[i]/p->coord[i]) ); } return 0.5*r; } // A class for storing point set class Set { public: int n; Point **array; // constructor Set(int nn, int dd) { n=nn; array=new Point*[n]; //cout<<"Created point array"<d); for(int i=0;id;i++) result->coord[i]=p->coord[i]; return result; } // Functions for the geometric mean Point* Log(Point* p) { Point *result=new Point(p->d); for(int i=0;id;i++) result->coord[i]=log(p->coord[i]); return result; } Point* Exp(Point* p) { Point *result=new Point(p->d); for(int i=0;id;i++) result->coord[i]=exp(p->coord[i]); return result; } // // Compute generalized means also called quasi-arithmetic or f-means // Point* GeneralizedMeans(Set* set, Point*(* FunctionGrad)(Point*), Point*(*FunctionGradinv)(Point*)) { int i,j; int d=set->array[0]->d; double w=(1.0/(double)(set->n)); Point* result=new Point(set->array[0]->d); Point* addp; result->zero(); for(i=0;in;i++) { addp=FunctionGrad(set->array[i]); // Centroid: uniform weight vector w (barycenters allow arbitrary unit vectors) for(j=0;jcoord[j]+= w*addp->coord[j]; delete addp; } return (*FunctionGradinv)(result); } // // The minimum average is the Jensen-Shannon difference, a Burbea-Rao divergence // double InformationRadius(Set* set, Point* centroid) { double r=0.0; for(int i=0;in;i++) r+=SymmetrizedKullbackLeibler(set->array[i],centroid); return r/(double)(set->n); } // // Veldhuis' symmetrical Kullback-Leibler centroid algorithm // IEEE Signal Processing Letters, 2002 // Proceeds by convex programming using a renormalization step // Point* SKLConvexPrograming(Set* set) { Point* arithmetic, *geometric, *normalizedgeometric, *sklcentroid; Point* x,*y; double c, sum; int k,d; d=set->array[0]->d; x=new Point(d); y=new Point(d); sklcentroid=new Point(d); // Initialization arithmetic=GeneralizedMeans(set, Identity, Identity); geometric=GeneralizedMeans(set, Log, Exp); normalizedgeometric=geometric->normalized(); c=-1; // Main loop for(int loop1=1;loop1<10;loop1++) { for(k=0;kcoord[k]=arithmetic->coord[k]/(normalizedgeometric->coord[k]*exp(c)); x->coord[k]=1.0; } // First inner loop: adjust x for(int loop2=1;loop2<5;loop2++) { for(k=0;kcoord[k]=x->coord[k]-(x->coord[k]*log(x->coord[k])-y->coord[k])/(1.0+log(x->coord[k])); } // Second inner loop: adjust parameter c for(int loop3=1;loop3<5;loop3++) { sum=0.0; for(k=0;kcoord[k]*normalizedgeometric->coord[k]*exp(c); c=c-(sum-1.0)/sum; } } // // Update the centroid // for(k=0;kcoord[k]=x->coord[k]*normalizedgeometric->coord[k]*exp(c); // Clean delete arithmetic; delete geometric; delete normalizedgeometric; delete x; delete y; return sklcentroid; } // // Second method: Symmetrized Bregman divergences // // Functions for handling natural parameters of multinomials (of the exponential family) Point* MultinomialGradF(Point* theta) { double s=0.0; Point * result=new Point(theta->d); for(int i=0;id;i++) s+=( exp(theta->coord[i]) ); for(i=0;id;i++) result->coord[i]=exp(theta->coord[i])/(1.0+s); return result; } Point* MultinomialGradFinv(Point* eta) { double s=0.0; int i; Point * result=new Point(eta->d); for(i=0;id;i++) s+=( eta->coord[i] ); for(i=0;id;i++) result->coord[i]=log(eta->coord[i]/(1.0-s)); return result; } class BregmanDivergence { public: virtual double F(Point* p)=0; virtual Point* gradF(Point* p)=0; virtual Point* gradFinv(Point* p)=0; BregmanDivergence(){} ~BregmanDivergence(){} // Part common to all Bregman divergences double Divergence(Point* p, Point* q) { Point *pq; double r; pq=new Point(p->d); for(int i=0;id;i++) pq->coord[i]=p->coord[i]-q->coord[i]; r=F(p)-F(q)-InnerProduct(pq,gradF(q)); delete pq; return r; } Point* GeodesicPoint(Point* p, Point* q, double l) { Point *result=new Point(p->d), *fresult; int i; Point* gradp, *gradq; gradp=gradF(p); gradq=gradF(q); for(i=0;id;i++) result->coord[i]=(1.0-l)*gradp->coord[i]+l*gradq->coord[i]; delete gradp; delete gradq; fresult=gradFinv(result); delete result; return fresult; } }; // // Kullback-Leibler distance for multinomial distribution // (a member of the exponential family) class BDMultinomial: public BregmanDivergence {public: double F(Point* theta) { double s=0.0; for(int i=0;id;i++) s+=( exp(theta->coord[i]) ); return log(1.0+s); } Point* gradF(Point* theta){ return MultinomialGradF(theta);} Point* gradFinv(Point* eta){ return MultinomialGradFinv(eta);} }; // End of the KL multinomial Bregman divergence class // Convert d-dimensional distribution to (d-1)-dimensional multinomial // (Lower by one dimension) Point* DistributionToMultinomial(Point* p) { int i; int d=p->d; Point* result=new Point(d-1); double den=p->coord[d-1]; for(i=0;icoord[i]=log(p->coord[i]/den); return result; } // Convert (d-1)-dimensional multinomial to d-dimensional distribution // (Augment by one dimension) Point* MultinomialToDistribution(Point* p) { int i; int d=p->d; Point* result=new Point(d+1); double s=0.0; for(i=0;icoord[i]); result->coord[d]=1.0/(1.0+s); for(i=0;icoord[i]=result->coord[d]*exp(p->coord[i]); return result; } // // Walk on the geodesic to find the 2-mean (non-uniform weight) // (Nielsen and Nock, 2007) // Point* SBDGeodesicWalk(Set *theta) { int i,dm1=theta->array[0]->d; Point *centroid; Point *thetaR=new Point (dm1); Point *thetaL=new Point(dm1); Point *pthetageodesic=new Point(dm1); double l, lmin, lmax; BregmanDivergence *BD=new BDMultinomial(); // STEP 1: convert to multinomial natural parameters // This was done before since we received already natural parameters in an array // STEP 2: Compute the right-type and left-type sided centroids thetaR=GeneralizedMeans(theta,Identity,Identity); thetaL=GeneralizedMeans(theta, MultinomialGradF ,MultinomialGradFinv); // We do not need theta set anymore from now on, just the sided centroids //thetaR->Save("nthetaR.data");thetaL->Save("nthetaL.data"); // STEP 3: Take a walk on the geodesic // (either in curved or primal space): Here in curved primal space lmin=0.0;lmax=1.0; while( (lmax-lmin) >1.0e-6) { l=(lmin+lmax)/2.0; pthetageodesic=BD->GeodesicPoint(thetaR, thetaL, l); if (BD->Divergence(thetaR,pthetageodesic)>BD->Divergence(pthetageodesic,thetaL)) lmax=l; else lmin=l; } l=(lmin+lmax)/2.0; pthetageodesic=BD->GeodesicPoint(thetaR, thetaL, l); //pthetageodesic->Save("nthetageodesic.data"); // STEP 4: Transform back multinomial natural (d-1)-dimensional parameters to d-dimensional distribution parameters centroid=MultinomialToDistribution(pthetageodesic); delete BD; delete thetaL; delete thetaR; delete pthetageodesic; return centroid; } // // Main entry program for comparing Veldhuis' approach with the geodesic walk approach // int _tmain(int argc, _TCHAR* argv[]) { Set *set, *theta; Point *centroidCP, *centroidBD; char filename[256]; int i,n=5000; cout<<"Compute the symmetrical Kullback-Leibler centroid of a set of probability distributions using two methods: (1) Veldhuis's convex programming (IEEE SPL'02) and the generic symmetrized Bregman divergence (Nielsen and Nock, 2007)."<n;i++) theta->array[i]=DistributionToMultinomial(set->array[i]); /* // For reading from input point text files for(i=0;iarray[i]=new Point(filename);} */ startt1=GetTickCount(); centroidCP=SKLConvexPrograming(set); stopt1=GetTickCount(); dt1=(float)(stopt1-startt1); startt2=GetTickCount(); centroidBD=SBDGeodesicWalk(theta); stopt2=GetTickCount(); dt2=(float)(stopt2-startt2); cout<<"Centroid using SKL Veldhuis'method:"< key to exit."<