/*
    gaussian_sim_marginal

    Copyright (C) 2013 Douglas L. Theobald

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.

    -/_|:|_|_\-
*/
/******************************************************************************
 *
 *  File:           gaussian_sim_marginal.c
 *
 *  Function:       
 *
 *  Author(s):      Douglas L. Theobald
 *
 *  Copyright:      Copyright (c) 2013 Douglas L. Theobald
 *                  All Rights Reserved.
 *
 *  Source:         Started anew.
 *
 *  Notes:          
 *
 *  Change History:
 *          2011_04_15_nnn    Started source
 *  
 *****************************************************************************/
// gcc -O3 -ffast-math -Wall -Werror -std=c99 -pedantic -o gaussian_sim_marginal -lgsl -lgslcblas gaussian_sim_marginal.c


#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <unistd.h>
#ifdef   __linux__
  #include <getopt.h>
#endif
#include <ctype.h>
#include <math.h>
#include <float.h>
#include <pthread.h>
#include <assert.h>
#include <sys/resource.h>
#include <time.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_integration.h>
#include <gsl/gsl_math.h>
#include <gsl/gsl_eigen.h>
#include <gsl/gsl_linalg.h>
#include <gsl/gsl_sf_psi.h>

#define LN2PI  (M_LN2 + M_LNPI)

double             burnin = 0.0;
int                iters = 1000000;
double             nu = 1.0;
int                dim = 1; /* number of params */
int                hdim = 1; /* # hierarchical params */
int                ndata = 100;
int                nd;
double             lambda_0 = 1.0;
unsigned long int  seed = 0;
int                expo_model = 0;
int                gauss_model = 0;
int                write_files = 0;
int                thrdnum = 1;
int                parallel = 0;
int                entropy_calc = 0;

double            *pave = NULL; /* for use with CalcPCov and CalcPAve */
double            *y = NULL; /* posterior param average */
double            *h = NULL; /* hyperparameter */
double             yt, yt2, x2t;
double            *musim = NULL;
double            *lnpost = NULL;
double            *lnlike = NULL;
double            *lnprior = NULL;
double           **x = NULL; /* posterior sample */
double            *x2 = NULL;
double           **data = NULL; /* data */
double           **cov = NULL;

double             avelnlike = 0.0, avelnprior = 0.0, varlnpost = 0.0;
double             avelnprlk2 = 0.0, avelnpost = 0.0, avelnprlk = 0.0;


void
Usage(void);


void
VecPrint(double *vec, const int size)
{
    int             i;

    for (i = 0; i < size; ++i)
        printf(" %4d [ % 14.8e ]\n", i, vec[i]);

    printf("\n");

    fflush(NULL);
}


void
MatPrintLowerDiag(double **matrix, const int size)
{
    int             i, j;

    printf("\n\n");
    for (i = 0; i < size; ++i)
    {
        printf("%-2d: [", i);
        for (j = 0; j <= i; ++j)
            printf(" % 14.6f", matrix[i][j]);
        printf(" ]\n");
    }
    
    printf("     ");
    for (i = 0; i < size; ++i)
        printf(" % 14d", i);
    printf("\n");

    fflush(NULL);
}


void
MatPrint(double **matrix, const int size)
{
    int             i, j;

    printf("\n\n");
    for (i = 0; i < size; ++i)
    {
        printf("%-2d: [", i);
        for (j = 0; j < size; ++j)
            printf(" % 14.6f", matrix[i][j]);
        printf(" ]\n");
    }
    
    printf("     ");
    for (i = 0; i < size; ++i)
        printf(" % 14d", i);
    printf("\n");

    fflush(NULL);
}


void
MatDestroy(double ***matrix_ptr)
{
    double       **matrix = *matrix_ptr;

    if (matrix != NULL)
    {
        if (matrix[0] != NULL)
        {
            free(matrix[0]);
            matrix[0] = NULL;
        }

        free(matrix);
        *matrix_ptr = NULL;
    }
}


double
**MatAlloc(const int rows, const int cols)
{
    int            i;
    double       **matrix = NULL;
    double        *matspace = NULL;

    matspace = (double *) calloc((rows * cols), sizeof(double));
    if (matspace == NULL)
    {
        perror("\n ERROR");
        printf("\n ERROR: Failure to allocate matrix space in MatAlloc(): (%d x %d)\n", rows, cols);
        exit(EXIT_FAILURE);
    }

    /* allocate room for the pointers to the rows */
    matrix = (double **) malloc(rows * sizeof(double *));
    if (matrix == NULL)
    {
        perror("\n ERROR");
        printf("\n ERROR: Failure to allocate room for row pointers in MatAlloc(): (%d)\n", rows);
        exit(EXIT_FAILURE);
    }

    /*  now 'point' the pointers */
    for (i = 0; i < rows; i++)
        matrix[i] = matspace + (i * cols);

    return(matrix);
}


/* 
Calculate eigenvalues of a square, symmetric, real matrix, using GSL.
Eigenvalues are returned in descending order, largest first. 
Pointer *eval must be allocated. 
Input matrix **cov is NOT perturbed. 
*/
void
EigenvalsGSL(double **cov, const int dim, double *eval)
{
    double        *cov_cpy = NULL;

    cov_cpy = malloc(dim * dim * sizeof(double));
    memcpy(cov_cpy, cov[0], dim * dim * sizeof(double));
    gsl_matrix_view m = gsl_matrix_view_array(cov_cpy, dim, dim);
    gsl_vector_view evalv = gsl_vector_view_array(eval, dim);
    gsl_eigen_symm_workspace *w = gsl_eigen_symm_alloc(dim);

    gsl_eigen_symm(&m.matrix, &evalv.vector, w);

    gsl_eigen_symm_free(w);
    free(cov_cpy);
}


/* This one destroys half of the input matrix **cov */
void
EigenvalsGSLDest(double **cov, const int dim, double *eval)
{
    gsl_matrix_view m = gsl_matrix_view_array(cov[0], dim, dim);
    gsl_vector_view evalv = gsl_vector_view_array(eval, dim);
    gsl_eigen_symm_workspace *w = gsl_eigen_symm_alloc(dim);
    gsl_eigen_symm(&m.matrix, &evalv.vector, w);
    gsl_eigen_symm_free(w);
}


void
CholeskyGSLDest(double **mat, const int dim)
{
    gsl_matrix_view m = gsl_matrix_view_array(mat[0], dim, dim);
    gsl_linalg_cholesky_decomp(&m.matrix);
}


static void
RandFillVec(double *vec, int len, int randmeth, const gsl_rng *r2)
{
    int             j;

    for (j = 0; j < len; ++j)
    {
        switch (randmeth)
        {
            case 1:
            case 'n': /* normal */
                //vec[j] = normal_dev(0.0, 1.0);
                vec[j] = gsl_ran_gaussian(r2, 1.0);
                /* printf("\n%f", vec[j]); */
                break;
            case 2:
            case 'l': /* logistic */
                vec[j] = gsl_ran_logistic(r2, 1.0);
                break;
            case 3:
            case 'L': /* Laplacian */
                vec[j] = gsl_ran_laplace(r2, 1.0);
                break;
            case 4:
            case 'C': /* Cauchy */
                vec[j] = gsl_ran_cauchy(r2, 1.0);
                break;
            case 5:
            case 'g': /* gamma */
                vec[j] = gsl_ran_gamma(r2, 1.0, 1.0);
                break;
//                 case 6:
//                 case 'W': /* Wald = inverse gaussian w/ 1 */
//                     invgauss_dev(1.0, 1.0, r2);
//                     break;
//                 case 7:
//                 case 'p': /* thirdOpoly */
//                     vec[j] = thirdOpoly_dev(b, c, d, r2);
//                     printf("%f\n", vec[j]);
//                     break;
//                 case 8:
//                 case 'i': /* inverse gaussian w/ 1 */
//                     vec[j] = invgauss_dev(3.0, 1.0, r2);
//                     break;
//                 case 9:
//                 case 'E': /* EVD */
//                     /* a = -0.57722 * b; */
//                     vec[j] = EVD_dev(0.0, 1.0, r2);
//                     break;
//                 case 10:
//                 case 'c': /* chi-squared */
//                     vec[j] = chisqr_dev(1.0, 0.0, r2);
//                     break;
//                 case 11:
//                 case 'R': /* Rayleigh - same as Weibull w/2 */
//                     vec[j] = weibull_dev(1.0, 2.0, r2);
//                     break;
            case 12:
            case 'e': /* exponential */
                vec[j] = gsl_ran_exponential(r2, 1.0);
                break;
            default:
                printf("\n  ERROR888: Bad random param -R '%c' \n",
                       (char) randmeth);
                Usage();
                exit(EXIT_FAILURE);
        }
    }
}


double
RandScale(double variance, int randmeth, double b)
{
    double          scale;

    switch(randmeth)
    {
        case 1:
        case 'n': /* normal */
            scale = sqrt(variance);
            break;
        case 2:
        case 'l': /* logistic */
            scale = sqrt(3.0 * variance) / M_PI;
            break;
        case 3:
        case 'L': /* Laplacian */
            scale = sqrt(variance / 2.0);
            break;
        case 4:
        case 'C': /* Cauchy */
            scale = 1;
            break;
        case 5:
        case 'g': /* gamma */
            scale = sqrt(variance / b);
            break;
//         case 6:
//         case 'W': /* Wald = inverse gaussian w/ 1 */
//             scale = 1.0 / variance;
//             break;
//         case 7:
//         case 'p': /* thirdOpoly */
//             scale = sqrt(variance);
//             break;
//         case 8:
//         case 'i': /* inverse gaussian w/ 1 */
//             a = 3.0;
//             scale = a*a*a / variance;
//             break;
//         case 9:
//         case 'E': /* EVD */
//             scale = sqrt(6.0 * variance) / M_PI;
//             break;
//         case 10:
//         case 'c': /* chi-squared */
//             scale = variance / 2.0;
//             break;
//         case 11:
//         case 'R': /* Rayleigh - same as Weibull w/2 */
//             scale = sqrt(variance/(2.0 - (M_PI / 2.0)));
//             break;
        case 12:
        case 'e': /* exponential */
            scale = sqrt(variance);
            break;
        default:
            scale = sqrt(variance);
    }

    return(scale);
}


void
RandVec(double **vec, const int len, const int iters, const gsl_rng *r2)
{
    int            i, j, k;
    double       **covmat = MatAlloc(len, len);
    double       **cormat = MatAlloc(len, len);
    double       **tmpmat = MatAlloc(len, len);
    double        *diag = malloc(len * sizeof(double));
    double        *eval = malloc(len * sizeof(double));
    double       **tmpvec = MatAlloc(len, iters);
    double         lndet;

    for (i = 0; i < len; ++i)
        for (j = 0; j < i; ++j)
            tmpmat[i][j] = gsl_ran_flat(r2, -1.0, 1.0);

    for (i = 0; i < len; ++i)
        tmpmat[i][i] = gsl_ran_flat(r2, 0.0, 1.0);

    MatPrintLowerDiag(tmpmat, len);

    for (i = 0; i < len; ++i)
        for (j = 0; j < len; ++j)
            for (k = 0; k < len; ++k)
                cormat[i][k] += tmpmat[i][j] * tmpmat[k][j];

    printf("\n\"correlation matrix\":");
    MatPrintLowerDiag(cormat, len);

//    PrintCovMatGnuPlot((const double **) covmat, len, mystrcat(cdsA->algo->rootname, "_cor.mat"));

    for (i = 0; i < len; ++i)
        diag[i] = gsl_ran_gamma(r2, 2.0, 10.0);

    for (i = 0; i < len; ++i)
        for (j = 0; j < len; ++j)
            covmat[i][j] = cormat[i][j] * sqrt(diag[i] * diag[j]);

    for (i = 0; i < len; ++i)
        covmat[i][i] += 1.0;

    printf("\ncovariance matrix:");
    MatPrintLowerDiag(covmat, len);

    for (i = 0; i < len; ++i)
        diag[i] = covmat[i][i];

    printf("\nvariances:\n");

    for (i = 0; i < len; ++i)
        printf("%-3d %f\n", i, diag[i]);

    for (i = 0; i < len; ++i)
        for (j = 0; j < len; ++j)
            cormat[i][j] = covmat[i][j] / sqrt(diag[i] * diag[j]);

    printf("\ntrue correlation matrix:");
    MatPrintLowerDiag(cormat, len);

//     EigenvalsGSL(cormat, len, eval);
// 
//     printf("\neigenvalues:\n");
// 
//     for (i = 0; i < len; ++i)
//         printf("%-3d %f\n", i, eval[i]);
// 
//     lndet = 0.0;
//     for(i = 0; i < len; ++i)
//         lndet += log(eval[i]);
// 
//     printf("logdet: %f\n", lndet);

    EigenvalsGSL(covmat, len, eval);

    printf("\neigenvalues:\n");

    for (i = 0; i < len; ++i)
        printf("%-3d %f\n", i, eval[i]);

    lndet = 0.0;
    for(i = 0; i < len; ++i)
        lndet += log(eval[i]);

    printf("logdet: %f\n", lndet);

    double entropy = 0.5 * len * log(2.0 * M_PI * M_E) + 0.5 * lndet;
    printf("\nentropy:    %14.3f", entropy);

    CholeskyGSLDest(covmat, len);
    printf("\nCholesky lower diagonal matrix:");
    MatPrintLowerDiag(covmat, len);

    fflush(NULL);

    for (i = 0; i < len; ++i)
        RandFillVec(tmpvec[i], iters, 1, r2);

    for (i = 0; i < iters; ++i)
        for (j = 0; j < len; ++j)
            for (k = 0; k <= j; ++k) /* because covmat is lower diagonal, uppper should be all zeros */        
                vec[j][i] += covmat[j][k] * tmpvec[k][i];

//     for (i = 0; i < iters; ++i)
//     {
//         printf("UNIFORM %4d", i);
//         for (j = 0; j < len; ++j)
//             printf(" %14.10f", erf(vec[j][i]/sqrt(2.0)));
//         printf("\n");
//     }
//     fflush(NULL);

    MatDestroy(&tmpvec);
    MatDestroy(&tmpmat);
    MatDestroy(&covmat);
    MatDestroy(&cormat);
    free(diag);
    free(eval);
}


/* 
Calculate harmonic mean estimator, which should never be used, but we determine it for fun
and to see how bad it actually is. 
As boni, we get the log arithmetic mean likelihood and log geometric mean likelihood. 
*/
double
CalcHarmonicMean(const double *ll, const int len)
{
    double         blik, mlik, hmlik, amlik, diff, ediff, liksi, harm_mean, var, tmp;
    int            i;

    /* first center the log-likelihoods, as the likelihoods are probably too small to represent. */
    blik = 0.0;
    for (i = 0; i < len; ++i)
        blik += ll[i];

    blik /= len;

    mlik = hmlik = amlik = 0.0;
    for (i = 0; i < len; ++i)
    {
        liksi = ll[i];
        diff = liksi - blik;
        ediff = exp(diff);

        if (isfinite(ediff))
        {
            mlik  += ediff;
            hmlik += 1.0 / ediff;
        }

        amlik += liksi;
    }

    amlik /= len;

    var = 0.0;
    for (i = 0; i < len; ++i)
    {
        tmp = ll[i] - amlik;
        var += tmp*tmp;
    }

    var /= len;

/* 
    if (badsamp > 0)
        printf("\nWARNING: %d samples excluded, not finite\n", badsamp);
*/
    harm_mean = blik - log(hmlik) + log(len);
    printf("\n%-22s% 14d", "samples:", len);

    printf("\n%-22s% 16.4f", "log arithmetic mean:", log(mlik / len) + blik);
    printf("\n%-22s% 16.4f", "log geometric mean:", amlik);
    printf("\n%-22s% 16.4f", "log harmonic mean:", harm_mean);
    printf("\n%-22s% 16.4f", "variance of log like:", var);
    printf("\n%-22s% 16.4f", "log normal estimate:", amlik - 0.5 * var);
    printf("\n%-22s% 16.4f", "DIC:", amlik - var);
    //printf("\n%-22s% 16.4f", "BICM_19 estimate:", amlik + var - 0.5 * dim * log(ndata));
    printf("\n%-22s% 16.4f", "BICM_20 estimate:", amlik - var * (log(ndata) - 1.0));
    printf("\n%-22s% 16.4f", "BICM_DLT estimate:", amlik - var * log(ndata) + var * (ndata-1.0)/ndata);
    //printf("\n%-22s% 16.4f", "BICM_DLT2 estimate:", amlik - var * (log(ndata)-1.0) + var * (ndata-1.0)/ndata);
    printf("\n");
    fflush(NULL);

    return(harm_mean);
}


double
average(const double *data, const int dim)
{
    double          m = 0.0;
    int             i = dim;

    while(i-- > 0)
        m += *data++;

    return(m / (double) dim);
}


double
variance(const double *data, const int dim, const double mean)
{
    double          v = 0.0, tmpv;
    int             i = dim;

    while(i-- > 0)
    {
        tmpv = *data++ - mean;
        v += (tmpv * tmpv);
    }

    return(v / dim);
}


/*
Calculate the bias in the entropy estimate due to deviation from Normality.
Based on on Edgeworth expansion of a PDF in terms of its cumulants (moments).
The bias term is substracted from the usual multivariate Gaussian
entropy:

0.5 * d * log(2.0 * M_PI * M_E) + 0.5 * lndet

where lndet is the log of the determinant of the d*d covariance matrix.

Multivariate third order corrections (using the skewness) come from Van Hulle 2005:

See:

Marc M. Van Hulle (2005)
"Multivariate Edgeworth-based entropy estimation."
2005 IEEE Workshop on Machine Learning for Signal Processing,
Conference Proceedings
28-28 Sept. 2005
pp 311 - 316

or

Marc M. Van Hulle (2005)
"Edgeworth Approximation of Multivariate Differential Entropy"
Neural Computation 17, 1903–1910

See equation 2.2.

The fourth order corrections (kurtosis terms) are univariate only;
they don't account for cross-kurtosis between dimensions.
Fourth order corrections are from Comon 1994:

Comon, P. (1994)
"Independent component analysis, a new concept?"
Signal processing 36, 287–314.

Amari 1996 also has similar 4th order corrections, but they seem to be
wrong:

Amari, S.-I., Cichocki, A. and Yang, H. H. (1996)
"A new learning algorithm for blind signal separation."
Advances in neural information processing systems 8
Eds. D. Touretzky, M. Mozer, and M. Hasselmo.
MIT Press, Cambridge.
757–763 (1996).
*/
double
CalcEdgeworthVanHulleEntropy(double **vec, int dim, int len)
{
    int            i, j, k, m;
    double        *ave = NULL;
    double        *std = NULL;
    double        *eval = NULL;
    double       **dif = MatAlloc(dim,len);
    double         term1, term2, term3;
    double         term4, term5, term6;
    double         t3, t4;
    double         kappa_iii, kappa_iij, kappa_ijk;
    double         kappa_iiii;
    double         entropy, bias, lnscale, lndet, sum, var;
    double       **cor = MatAlloc(dim,dim);
    double       **cov = MatAlloc(dim,dim);
    double         invlen = 1.0/(len-1);


    ave  = malloc(dim * sizeof(double));
    std  = malloc(dim * sizeof(double));
    eval = malloc(dim * sizeof(double));

    /* First, normalize data vector to 0 mean, unit 1 variance */
    for (i = 0; i < dim; ++i)
        ave[i] = average(vec[i], len);

    //VecPrint(ave, dim);

    for (i = 0; i < dim; ++i)
        for (j = 0; j < len; ++j)
            dif[i][j] = vec[i][j] - ave[i];

    for (i = 0; i < dim; ++i)
    {
        var = 0.0;
        for (j = 0; j < len; ++j)
            var += dif[i][j] * dif[i][j];
        
        std[i] = sqrt(var * invlen);
    }

    //VecPrint(std, dim);

    /* Save the determinant of the scale transformation */
    lnscale = 0.0;
    for (i = 0; i < dim; ++i)
        lnscale += log(std[i]);

    /* rescale centered data */
    for (i = 0; i < dim; ++i)
        std[i] = 1.0 / std[i];

    for (i = 0; i < dim; ++i)
        for (j = 0; j < len; ++j)
            dif[i][j] *= std[i];

    /* Calculate the covariance matrix of transformed data (= correlation matrix) */
    for (i = 0; i < dim; ++i)
    {
        for (j = 0; j <= i; ++j)
        {
            sum = 0.0;
            for (k = 0; k < len; ++k)
                sum += dif[i][k] * dif[j][k];

            cor[i][j] = cor[j][i] = sum * invlen;
        }
    }

//     printf ("\n\nEdgeworth correlation matrix:");
//     MatPrintLowerDiag(cor, dim);
// 
//     for (i = 0; i < dim; ++i)
//         for (j = 0; j < dim; ++j)
//             cov[i][j] = cor[i][j] / (std[i] * std[j]);
// 
//     printf ("\n\nEdgeworth covariance matrix:");
//     MatPrintLowerDiag(cov, dim);

    EigenvalsGSL(cor, dim, eval);

    VecPrint(eval, dim);

    lndet = 0.0;
    for (i = 0; i < dim; i++)
    {
        if (isgreater(eval[i], DBL_EPSILON))
        {
            lndet += log(eval[i]);
        }
        else
        {
            printf("\n WARNING: excluding eigenvalue %d from determinant calculation", i);
            printf("\n WARNING: eigenvalue[%d] = %g < %g", i, eval[i], FLT_EPSILON);
        }
    }

    term1 = 0.0;
    term4 = 0.0;
    term5 = 0.0;
    term6 = 0.0;
    for (i = 0; i < dim; ++i)
    {
        kappa_iii = 0.0;
        kappa_iiii = 0.0;
        for (j = 0; j < len; ++j)
        {
            t3 = dif[i][j] * dif[i][j] * dif[i][j];
            kappa_iii += t3; /* skewness */
            kappa_iiii += t3 * dif[i][j]; /* kurtosis */
        }

        kappa_iii *= invlen;
        kappa_iiii *= invlen;
        kappa_iiii -= 3.0;

        t3 = kappa_iii * kappa_iii;
        t4 = kappa_iiii * kappa_iiii;
        term1 += t3;
        term4 += t4;
        term5 += t3*t3;
        term6 += t3 * kappa_iiii;
    }

    term2 = 0.0;
    for (i = 0; i < dim; ++i)
    {
        for (j = 0; j < dim; ++j)
        {
            if (i != j)
            {
                kappa_iij = 0.0;
                for (k = 0; k < len; ++k)
                    kappa_iij += dif[i][k] * dif[i][k] * dif[j][k];

                kappa_iij *= invlen;

                term2 += kappa_iij * kappa_iij;
            }
        }
    }

    term3 = 0.0;
    for (i = 0; i < dim; ++i)
    {
        for (j = 0; j < i; ++j)
        {
            for (k = 0; k < j; ++k)
            {
                kappa_ijk = 0.0;
                for (m = 0; m < len; ++m)
                    kappa_ijk += dif[i][m] * dif[j][m] * dif[k][m];

                kappa_ijk *= invlen;

                term3 += kappa_ijk * kappa_ijk;
            }
        }
    }

    /* There are d \kappa_{i,i,i} terms, 2 {d \choose 2} \kappa_{i,i,j} terms, 
       and {d \choose 3} \kappa_{i,j,k} terms.
       gsl_sf_choose (unsigned int n, unsigned int m) */

    /* The following is based on Comon, P. (1994) Signal processing 36, 287–314.
       See eqn 3.4 (Theorem 14).
       The similar equations (7 & 8) in Amari, Cichocki, and Yang (1996) seem to be wrong.  */

    bias = (term1 + 3.0 * term2 + term3 / 6.0) / 12.0 + term4/48.0 + 7.0*term5/48.0 - term6/8.0;

    printf("\nEdgeworth term1: %g", term1/ 12.0);
    printf("\nEdgeworth term2: %g", 3.0*term2/ 12.0);
    printf("\nEdgeworth term3: %g", term3/(6.0*12.0));
    printf("\nEdgeworth term4: %g", +term4/48.0);
    printf("\nEdgeworth term5: %g", +7.0*term5/48.0);
    printf("\nEdgeworth term6: %g\n", - term6/8.0);

    printf("\nln(det):           %14.3f", lndet);

    entropy = 0.5 * dim * log(2.0 * M_PI * M_E) + 0.5 * lndet;
    
    printf("\nwhite entropy:     %14.3f", entropy);
    printf("\nbias:              %14.3f", bias);
    printf("\nln(scale):         %14.3f", lnscale);

    printf("\nNaive N-entropy:   %14.3f", entropy + lnscale);

    //entropy = entropy - bias + lnscale;
    
    printf("\nEdgeworth entropy: %14.3f", entropy - term1/12.0 + lnscale);
    printf("\nEdgeworth entropy (4th order): %14.3f", entropy - bias + lnscale);
    printf("\n\n");
    
    entropy = entropy - bias + lnscale;

//     /* From eqns (7 & 8) in Amari, Cichocki, and Yang (1996).
//        Seems to be wrong.  */
//     term1 = 0.0;
//     term4 = 0.0;
//     term5 = 0.0;
//     term6 = 0.0;
//     for (i = 0; i < dim; ++i)
//     {
//         kappa_iii = 0.0;
//         kappa_iiii = 0.0;
//         for (j = 0; j < len; ++j)
//         {
//             t3 = dif[i][j] * dif[i][j] * dif[i][j];
//             kappa_iii += t3; /* skewness */
//             kappa_iiii += t3 * dif[i][j]; /* kurtosis */
//         }
// 
//         kappa_iii *= invlen;
//         kappa_iiii *= invlen;
//         kappa_iiii -= 3.0;
// 
//         t3 = kappa_iii * kappa_iii;
//         t4 = kappa_iiii * kappa_iiii;
//         term1 += t3;
//         term4 += t4;
//         term5 += t4 * kappa_iiii; // k_4^3;
//         term6 += t3 * kappa_iiii; // k_3^2 k_4
//     }
// 
//     bias = (term1 + 3.0 * term2 + term3 / 6.0) / 12.0 + term4/48.0 - term5/16.0 - 5.0*term6/8.0;
// 
//     printf("\nEdgeworth term1: %g", term1/ 12.0);
//     printf("\nEdgeworth term2: %g", 3.0*term2/ 12.0);
//     printf("\nEdgeworth term3: %g", term3/(6.0*12.0));
//     printf("\nEdgeworth term4: %g", +term4/48.0);
//     printf("\nEdgeworth term5: %g", -term5/16.0);
//     printf("\nEdgeworth term6: %g\n", - 5.0*term6/8.0);
// 
//     printf("\nln(det):           %14.3f", lndet);
// 
//     entropy = 0.5 * dim * log(2.0 * M_PI * M_E) + 0.5 * lndet;
//     
//     printf("\nwhite entropy:     %14.3f", entropy);
//     printf("\nbias:              %14.3f", bias);
//     printf("\nln(scale):         %14.3f", lnscale);
// 
//     printf("\nNaive N-entropy:   %14.3f", entropy + lnscale);
// 
//     //entropy = entropy - bias + lnscale;
//     
//     printf("\nEdgeworth entropy: %14.3f", entropy - term1/12.0 + lnscale);
//     printf("\nEdgeworth entropy: %14.3f", entropy - bias + lnscale);
//     printf("\n\n");

    MatDestroy(&dif);
    MatDestroy(&cor);
    MatDestroy(&cov);
    free(eval);
    free(std);
    free(ave);

    return(entropy);
}


void
CalcPAve(void)
{
    int            i, j;

    for (i = 0; i < dim; ++i)
        pave[i] = 0.0;

    for (i = 0; i < iters; ++i)
        for (j = 0; j < dim; ++j)
            pave[j] += x[j][i];

    for (i = 0; i < dim; ++i)
        pave[i] /= iters;
}


void
CalcPCov(void)
{
    int            i, j, k;
    double         tmpi, tmpj, sum;
    double         inviters = 1.0/(iters-1.0);

    //printf("\n%4d %g", iters, inviters);

    for (i = 0; i < dim; ++i)
    {
        for (j = 0; j <= i; ++j)
        {
            sum = 0.0;
            for (k = 0; k < iters; ++k)
            {
                tmpi = x[i][k] - pave[i];
                tmpj = x[j][k] - pave[j];
                sum += tmpi * tmpj;
                //printf("\n%4d %4d %4d: %16.8f %16.8f", i, j, k, tmpi, tmpj);
            }

            cov[i][j] = cov[j][i] = sum * inviters;
            //printf("\n%4d %4d: %16.8f %16.8f %16.8f", i, j, cov[i][j], sum*inviters, sum);
        }
    }
}


/* 
The Laplace-Metropolis estimator for calculating the marginal likelihood
from metropolis samples from the posterior distribution.

Steven M. Lewis, Adrian E. Raftery (1997)
"Estimating Bayes Factors via Posterior Stimulation with the Laplace-Metropolis Estimator."
Journal of the American Statistical Association, 92(438):648-655

Using equation 4, esp. see pp 649-650, first method to estimate \theta*.

IME, this is extremely accurate (using Gold Standard as a reference).

NB: REQUIRES CalcPAve() and CalcPCov() to have already calculated ave and cov.
*/
double
CalcLaplaceMet(void)
{
    int            i, j;
    int            maxind, runind;
    double         maxpost, lndet, lapmet, expmet, lnh, entropy, lnfish, edge_entropy;
    double        *eval = NULL;

    printf("Calculating Laplace approximation ...\n");
    fflush(NULL);

    CalcPAve();
    CalcPCov();

    for (i = 0; i < dim; ++i)
        printf("\nave p[%3d]:% 16.4f (+/- %16.4f)", i, pave[i], sqrt(cov[i][i]));

    printf ("\n\nParameter covariance matrix (estimate of minus inverse Hessian):");
    MatPrint(cov, dim);

    ////////////////////////////////////////////////////////////////////////////////////////////////
    eval = calloc(dim, sizeof(double));

    EigenvalsGSL(cov, dim, eval);

    lndet = lnfish = 0.0;
    for (i = 0; i < dim; i++)
    {
        if (isgreater(eval[i], DBL_EPSILON))
        {
            lndet += log(eval[i]);
            lnfish -= log(ndata * eval[i]);
        }
        else
        {
            printf("\n WARNING: excluding eigenvalue %d from determinant calculation", i);
            printf("\n WARNING: eigenvalue[%d] = %g < %g", i, eval[i], FLT_EPSILON);
        }
    }

    printf("\nln(FI):   %14.3f", lnfish);
    printf("\nln(det):  %14.3f", lndet);
    printf("\n-d ln(n): %14.3f", -dim * log(ndata));
    printf("\ndet:      %g\n", exp(lndet));
    for (i = 0; i < dim; i++)
        printf ("\neigenvalue[%d] = %g", i, eval[i]);
    printf ("\n");
    fflush(NULL);

    free(eval);

    ////////////////////////////////////////////////////////////////////////////////////////////////
    for (i = 0; i < dim; ++i)
    {
        for (j = 0; j <= i; ++j)
        {
            if (cov[i][j] == 0.0)
                cov[i][j] = cov[j][i] = 0.0;
            else
                cov[i][j] = cov[j][i] = cov[i][j] / sqrt(cov[i][i] * cov[j][j]);
        }
    }

//     for (i = 0; i < dim; ++i)
//         cov[i][i] = 1.0;

    printf ("\nParameter correlation matrix:");
    MatPrintLowerDiag(cov, dim);

    ////////////////////////////////////////////////////////////////////////////////////////////////
    /* Find the parameters with the maximum posterior prob */
    maxpost = -DBL_MAX;
    maxind = 0;
    runind = 0;
    for (i = 0; i < iters; ++i)
    {
        //printf("\nlnpost[%6d]: %g %g", i, lnpost[i], maxpost);
        lnh = lnprior[i] + lnlike[i];
        if (maxpost < lnh)
        {
            maxpost = lnh;
            maxind = i;
        }
    }

    printf("\n%-25s% 16.4f", "Max log posterior - p(D):", maxpost);
    printf("\n%-25s% 16.4f", "Max log prior:", lnprior[maxind]);
    printf("\n%-25s% 16.4f", "Max log likelihood:", lnlike[maxind]);

//     for (i = 0; i < dim; ++i)
//         printf("\nmax logPost p[%d]:   % 16.4f", i, x[i][maxind]);
// 
//     printf("\n%.4f", x[0][maxind]);
//     for (i = 1; i < dim; ++i)
//         printf(":%.4f", x[i][maxind]);
//     printf("\n");

    lapmet = maxpost + 0.5 * lndet + 0.5 * dim * log(2.0 * M_PI);

    printf("\n\nLog marginal likelihood ln p(x):");
    printf("\n%-30s% 16.4f\n\n", "Laplace-Metropolis:", lapmet);

    printf("Laplace approximation done ...\n");
    fflush(NULL);

    entropy = 0.5 * dim * log(2.0 * M_PI * M_E) + 0.5 * lndet;

    printf("Calculating Edgeworth entropy approximation ...\n");
    fflush(NULL);

    edge_entropy = CalcEdgeworthVanHulleEntropy(x, dim, iters);

    expmet = avelnprior + avelnlike + edge_entropy;

    printf("\n%-30s% 16.4f", "Posterior N-entropy:", entropy);
    printf("\n%-30s% 16.4f", "Edgeworth entropy:", edge_entropy);
    printf("\n%-30s% 16.4f", "Expected-Metropolis:", expmet);
    printf("\n%-30s% 16.4f", "varlnpost:", varlnpost);
    printf("\n%-30s% 16.4f", "DLT-Metropolis:", avelnprior + avelnlike - varlnpost * (log(ndata) - log(2.0 * M_PI * M_E)));
    printf("\n%-30s% 16.4f\n", "DLT-Metropolis, no prior:",  avelnlike - varlnpost * (log(ndata) - log(2.0 * M_PI * M_E)));
    printf("\n%-30s% 16.4f", "Ave lnPrior:", avelnprior);
    printf("\n%-30s% 16.4f\n", "Ave lnLike:", avelnlike);
    fflush(NULL);

    return(lapmet);
}


double
CalcLaplaceMetUni(void)
{
    int            i;
    int            maxind, runind;
    double         maxpost, lndet, lapmet, expmet, lnh, entropy, lnfish, edge_entropy;
    double         ave, var;

    printf("Calculating Laplace approximation ...\n");
    fflush(NULL);

    ave = average(x[0], iters);
    var = variance(x[0], iters, ave);

    lndet = log(var);
    lnfish = -log(nd * var);

    ////////////////////////////////////////////////////////////////////////////////////////////////
    printf("\nln(FI):   %14.3f", lnfish);
    printf("\nln(det):  %14.3f", lndet);
    printf("\n-d ln(n): %14.3f", -log(nd));
    printf("\ndet:      %g\n", exp(lndet));
    fflush(NULL);

    ////////////////////////////////////////////////////////////////////////////////////////////////
    /* Find the parameters with the maximum posterior prob */
    maxpost = -DBL_MAX;
    maxind = 0;
    runind = 0;
    for (i = 0; i < iters; ++i)
    {
        //printf("\nlnpost[%6d]: %g %g", i, lnpost[i], maxpost);
        lnh = lnprior[i] + lnlike[i];
        if (maxpost < lnh)
        {
            maxpost = lnh;
            maxind = i;
        }
    }

    printf("\n%-25s% 16.4f", "Max log posterior - p(D):", maxpost);
    printf("\n%-25s% 16.4f", "Max log prior:", lnprior[maxind]);
    printf("\n%-25s% 16.4f", "Max log likelihood:", lnlike[maxind]);

//     for (i = 0; i < dim; ++i)
//         printf("\nmax logPost p[%d]:   % 16.4f", i, x[i][maxind]);
// 
//     printf("\n%.4f", x[0][maxind]);
//     for (i = 1; i < dim; ++i)
//         printf(":%.4f", x[i][maxind]);
//     printf("\n");

    lapmet = maxpost + 0.5 * lndet + 0.5 * log(2.0 * M_PI);

    printf("\n\nLog marginal likelihood ln p(x):");
    printf("\n%-30s% 16.4f\n\n", "Laplace-Metropolis:", lapmet);

    printf("Laplace approximation done ...\n");
    fflush(NULL);

    entropy = 0.5 * log(2.0 * M_PI * M_E) + 0.5 * lndet;
    
    expmet = avelnprior + avelnlike + entropy;

    printf("Calculating Edgeworth entropy approximation ...\n");
    fflush(NULL);

    edge_entropy = CalcEdgeworthVanHulleEntropy(x, 1, iters);

    printf("\n%-30s% 16.4f", "Posterior N-entropy:", entropy);
    printf("\n%-30s% 16.4f", "Edgeworth entropy:", edge_entropy);
    printf("\n%-30s% 16.4f", "Expected-Metropolis:", expmet);
    printf("\n%-30s% 16.4f", "varlnpost:", varlnpost);
    printf("\n%-30s% 16.4f", "DLT-Metropolis:", avelnprior + avelnlike - varlnpost * (log(nd) - log(2.0 * M_PI * M_E)));
    printf("\n%-30s% 16.4f\n", "DLT-Metropolis, no prior:",  avelnlike - varlnpost * (log(nd) - log(2.0 * M_PI * M_E)));
    printf("\n%-30s% 16.4f", "Ave lnPrior:", avelnprior);
    printf("\n%-30s% 16.4f\n", "Ave lnLike:", avelnlike);
    fflush(NULL);

    return(lapmet);
}


/*
The simple Gaussian model described on page 203 of

Lartillot N, Philippe H. (2006)
"Computing Bayes factors using thermodynamic integration."
Syst Biol. 55(2):195-207.

The real data is all zeros (a "data-free" likelihood).  

It appears that they have forgotten the normalization constants.
*/
void
SimGaussLP(const double nu, const gsl_rng *r2)
{
    int             i, j;
    double          sigma = sqrt(nu / (1.0 + nu));
    double          sqrtnu = sqrt(nu), xij;
    double          tmp;

    avelnprior = avelnlike = avelnprlk2 = 0.0;
    
    for (i = 0; i < iters; ++i)
    {
        lnprior[i] = lnpost[i] = lnlike[i] = 0.0;
        for (j = 0; j < dim; ++j)
        {
            xij = gsl_ran_gaussian(r2, sigma);
            lnprior[i] += log(gsl_ran_gaussian_pdf(xij, sqrtnu));
            lnlike[i]  += log(gsl_ran_gaussian_pdf(xij, 1.0));
            lnpost[i]  += log(gsl_ran_gaussian_pdf(xij, sigma));
            x[j][i] = xij;
        }
        //printf("\nlnlike[%6d]: %g", i, lnlike[i]);
        avelnprior += lnprior[i];
        avelnlike  += lnlike[i];
        avelnprlk2 += (lnprior[i] + lnlike[i]) * (lnprior[i] + lnlike[i]);
    }

    avelnprior /= iters;
    avelnlike  /= iters;
    avelnpost = avelnprior + avelnlike;
    avelnprlk2 /= iters;

    varlnpost = 0.0;
    for (i = 0; i < iters; ++i)
    {
        tmp = lnprior[i] + lnlike[i] - avelnpost;
        varlnpost += tmp * tmp;
    }
    
    varlnpost /= iters;
 
    printf("\n%-22s% 16.4f", "varlnpost:", varlnpost);
    printf("\n%-22s% 16.4f", "avelnlike:", avelnlike);
    printf("\n%-22s% 16.4f", "avelnlike/n:", avelnlike/ndata);
    printf("\n%-22s% 16.4f", "avelnprior:", avelnprior);
    printf("\n%-22s% 16.4f", "avelnpost:", avelnpost);
    printf("\n%-22s% 16.4f", "avelnprlk2:", avelnprlk2);
    printf("\n%-22s% 16.4f", "entropy ln post:", 0.5 * log(2.0 * M_PI * varlnpost * M_E));
    printf("\n\n");
}


double
normal_lnpdf(const double x, const double mean, const double var)
{
    double          p;

    p = (-0.5 * log(2.0 * M_PI * var)) - ((x - mean)*(x - mean) / (2.0 * var));

    return (p);
}


typedef struct 
{
    double        **x;
    int             idim, len, start, end; 
} GibbsData;


static void
*sim_gauss_pth(void *gibbsdata_ptr)
{
    GibbsData     *gibbsdata = (GibbsData *) gibbsdata_ptr;
    int            i;
    const int      idim = (const int) gibbsdata->idim;
    double       **x = gibbsdata->x;
    double         tmpmu;

    const gsl_rng_type    *T = NULL;
    gsl_rng               *r2 = NULL;
    unsigned long int      seed;

    /* Every thread gets its own rng generator -- otherwise, we get data race junk in valgrind */
    T = gsl_rng_ranlxs2;
    r2 = gsl_rng_alloc(T);
    //seed = time(NULL) + chain;
    seed = time(NULL) + (unsigned long int) pthread_self() % gsl_rng_max(r2);
    //printf("\nseed[%d]:%ld %ld\n", pthread_self(), seed, time(NULL));
    gsl_rng_set(r2, seed);
    //par->r2 = r2;

    //tmpmu = gsl_ran_gaussian(r2, 10.0);
    //printf("\nmu[%d]: %g", i, tmpmu);
    tmpmu = 1.0;
    for (i = 0; i < gibbsdata->len; ++i)
    {
        x[idim][i] = gsl_ran_gaussian_ziggurat(r2, tmpmu);
        //x[idim][i] = gsl_ran_exponential(r2, tmpmu);
        //printf("\n%5d %5d % 16.6f", idim, i, x[idim][i]);
        //data[i][j] = 0.0;
    }

    printf("SimGauss thread %3d DONE\n", idim);
    fflush(NULL);

    gsl_rng_free(r2);
    r2 = NULL;

    pthread_exit((void *) 0);
}


void
SimGaussPth(double **data, GibbsData **gibbsdata, pthread_t *callThd,
            pthread_attr_t *attr, const int thrdnum)
{
    const int      len = ndata;
    int            i, rc = 0;


    for (i = 0; i < thrdnum ; ++i)
    {
        gibbsdata[i]->x = data;
        gibbsdata[i]->idim = i;
        gibbsdata[i]->len = len;

        rc = pthread_create(&callThd[i], attr, sim_gauss_pth, (void *) gibbsdata[i]);

        if (rc)
        {
            printf("ERROR811: return code from pthread_create() %d is %d\n", i, rc);
            exit(EXIT_FAILURE);
        }
    }

    for (i = 0; i < thrdnum; ++i)
    {
        rc = pthread_join(callThd[i], (void **) NULL);

        if (rc)
        {
            printf("ERROR812: return code from pthread_join() %d is %d\n", i, rc);
            exit(EXIT_FAILURE);
        }
    }

    return;
}


static void
*sim_expo_pth(void *gibbsdata_ptr)
{
    GibbsData     *gibbsdata = (GibbsData *) gibbsdata_ptr;
    int            i;
    const int      idim = (const int) gibbsdata->idim;
    double       **x = gibbsdata->x;
    double         tmpmu;

    const gsl_rng_type    *T = NULL;
    gsl_rng               *r2 = NULL;
    unsigned long int      seed;

    /* Every thread gets its own rng generator -- otherwise, we get data race junk in valgrind */
    T = gsl_rng_ranlxs2;
    r2 = gsl_rng_alloc(T);
    //seed = time(NULL) + chain;
    seed = time(NULL) + (unsigned long int) pthread_self() % gsl_rng_max(r2);
    //printf("\nseed[%d]:%ld %ld\n", pthread_self(), seed, time(NULL));
    gsl_rng_set(r2, seed);
    //par->r2 = r2;

    //tmpmu = gsl_ran_gaussian(r2, 10.0);
    //printf("\nmu[%d]: %g", i, tmpmu);
    tmpmu = 1.0;
    for (i = 0; i < gibbsdata->len; ++i)
    {
        //x[idim][i] = gsl_ran_gaussian_ziggurat(r2, tmpmu);
        x[idim][i] = gsl_ran_exponential(r2, tmpmu);
        //printf("\n%5d %5d % 16.6f", idim, i, x[idim][i]);
        //data[i][j] = 0.0;
    }

    printf("SimGauss thread %3d DONE\n", idim);
    fflush(NULL);

    gsl_rng_free(r2);
    r2 = NULL;

    pthread_exit((void *) 0);
}


void
SimExpoPth(double **data, GibbsData **gibbsdata, pthread_t *callThd,
            pthread_attr_t *attr, const int thrdnum)
{
    const int      len = ndata;
    int            i, rc = 0;


    for (i = 0; i < thrdnum ; ++i)
    {
        gibbsdata[i]->x = data;
        gibbsdata[i]->idim = i;
        gibbsdata[i]->len = len;

        rc = pthread_create(&callThd[i], attr, sim_expo_pth, (void *) gibbsdata[i]);

        if (rc)
        {
            printf("ERROR811: return code from pthread_create() %d is %d\n", i, rc);
            exit(EXIT_FAILURE);
        }
    }

    for (i = 0; i < thrdnum; ++i)
    {
        rc = pthread_join(callThd[i], (void **) NULL);

        if (rc)
        {
            printf("ERROR812: return code from pthread_join() %d is %d\n", i, rc);
            exit(EXIT_FAILURE);
        }
    }

    return;
}


void
SimGauss(const gsl_rng *r2)
{
    int             i, j;
    double          tmpmu;

    /* First, generate artificial data */
    /* precision/sigma/variance of likelihood is 1.0 */
    /* real mu = 0 */
    printf("Simulating Gaussian data ...\n");
    fflush(NULL);
    for (i = 0; i < dim; ++i)
    {
        //tmpmu = gsl_ran_gaussian(r2, 10.0);
        //printf("\nmu[%d]: %g", i, tmpmu);
        tmpmu = 1.0;
        for (j = 0; j < ndata; ++j)
        {
            data[i][j] = gsl_ran_gaussian_ziggurat(r2, tmpmu);
            //data[i][j] = 0.0;
        }
    }
}


void
SimExpo(const gsl_rng *r2)
{
    int             i, j;
    double          tmpmu;

    /* First, generate artificial data */
    /* scale of likelihood is 1.0 */
    printf("Simulating exponential data ...\n");
    fflush(NULL);
    for (i = 0; i < dim; ++i)
    {
        tmpmu = 1;
        //tmpmu = (1.0 / gsl_ran_exponential(r2, 10000));
        printf("\nmu[%d]: %g", i, tmpmu);
        for (j = 0; j < ndata; ++j)
        {
            data[i][j] = gsl_ran_exponential(r2, tmpmu);
            //printf("\n%g", data[i][j]);
            //data[i][j] = 0.0;
        }
    }
    
    printf("\n");
}


void
CalcCumulants(void)
{
    int             i, j;

    printf("Calculate vector first and second cumulants ...\n");
    fflush(NULL);
    /* for each dim, find the mean of the data */
    for (j = 0; j < dim; ++j)
    {
        y[j] = 0.0;
        for (i = 0; i < ndata; ++i)
            y[j] += data[j][i];
    }

//     for (i = 0; i < dim; ++i)
//         y[i] /= ndata;

    for (j = 0; j < dim; ++j)
    {
        x2[j] = 0.0;
        for (i = 0; i < ndata; ++i)
            x2[j] += data[j][i]*data[j][i];
    }

    yt = 0.0;
    for (j = 0; j < dim; ++j)
        yt += y[j];

    yt2 = 0.0;
    for (j = 0; j < dim; ++j)
        yt2 += y[j]*y[j];

    x2t = 0.0;
    for (j = 0; j < dim; ++j)
        x2t += x2[j];
}


void
WriteChain(char *fname, double **chain, const int n, const int d)
{
    FILE          *fp = fopen(fname ,"w");
    int            i, j;

    for (i = 0; i < n; ++i)
    {
        for (j = 0; j < d; ++j)
            fprintf(fp, "%-18.8f ", chain[j][i]);

        fprintf(fp, "\n");
    }

    fprintf(fp, "\n\n");
    fclose(fp);
    fflush(NULL);
}


/* mu = 0  */
/* lambda = precision of prior mu */
void
GibbsGauss(const double lambda_0, const gsl_rng *r2)
{
    int             i, j;
    double          postvar = 1.0 / (ndata + lambda_0);
    double          postsigma = sqrt(postvar);
    double          tmp, factor, musimj;
    //double          priorvar = 1.0 / lambda_0;
    const double    ln2pi2 = 0.5*log(2.0*M_PI);
    const double    ln2pi = log(2.0*M_PI);
    double          musim2, diffsum;

    /* Now sample posterior of mu with Gibbs */
    printf("Gibbs sampling ...\n");
    fflush(NULL);
    avelnprior = avelnlike = avelnpost = avelnprlk2 = 0.0;
    factor = 1.0 / (lambda_0 + ndata);

    for (i = 0; i < iters; ++i)
    {
        lnprior[i] = lnpost[i] = lnlike[i] = 0.0;

        for (j = 0; j < dim; ++j)
        {
            x[j][i] = musimj = gsl_ran_gaussian_ziggurat(r2, postsigma) + factor * y[j];
            //x[j][i] = gsl_ran_exponential(r2, 1.0);
            //x[j][i] = gsl_ran_gamma(r2, 5.0, 1.0);
            //x[j][i] = gsl_ran_weibull(r2, 1.0, 2.0);
            lnlike[i] += -ndata * ln2pi2 
                         -0.5 *(x2[j] - 2.0*musimj*y[j] + ndata*musimj*musimj);
        }

        musim2 = 0.0;
        for (j = 0; j < dim; ++j)
            musim2 += x[j][i]*x[j][i];

        diffsum = 0.0;
        for (j = 0; j < dim; ++j)
        {
            tmp = y[j] * factor - x[j][i];
            diffsum += tmp*tmp;
        }

        lnprior[i] = 0.5*(-dim*ln2pi + dim*log(lambda_0) - lambda_0 * musim2);
        lnpost[i]  = 0.5*(-dim*ln2pi + dim*log(lambda_0 + ndata) - (lambda_0 + ndata)*diffsum);

//         for (j = 0; j < dim; ++j)
//         {
//             tmpmu = y[j]*factor;
//             musim[j] = gsl_ran_gaussian(r2, postsigma) + tmpmu;
//             x[j][i] = musim[j];
//             lnprior[i] += normal_lnpdf(musim[j], 0.0, priorvar);
//             //for (k = 0; k < ndata; ++k)
//              //   lnlike[i] += normal_lnpdf(data[j][k], musim[j], 1.0);
//             lnlike[i] += -0.5 * ndata * log(2.0*M_PI) 
//                          -0.5 *(x2[j] - 2.0*musim[j]*y[j] + ndata*musim[j]*musim[j]);
//             lnpost[i] += normal_lnpdf(musim[j], tmpmu, postvar);
//        }

        //printf("\n%-d% 16.4f % 16.4f % 16.4f", i, lnprior[i], lnlike[i], lnpost[i]);

        avelnprior += lnprior[i];
        avelnlike  += lnlike[i];
        avelnpost  += lnpost[i];
        avelnprlk2 += (lnprior[i] + lnlike[i]) * (lnprior[i] + lnlike[i]);
    }

    avelnprior /= iters;
    avelnlike  /= iters;
    avelnpost  /= iters;
    avelnprlk = avelnprior + avelnlike;
    avelnprlk2 /= iters;

    varlnpost = 0.0;
    for (i = 0; i < iters; ++i)
    {
        tmp = lnprior[i] + lnlike[i] - avelnprlk;
        varlnpost += tmp * tmp;
    }

    varlnpost /= iters;

    printf("Gibbs done ...\n");
    fflush(NULL);

    //printf("\n%-22s% 16.4f", "log c:", -0.5 * log(2.0 * M_PI * varlnpost * M_E) + avelnlike + avelnprior); // this one is probably meaningless
    printf("\n%-22s% 16.4f", "varlnpost:", varlnpost);
    printf("\n%-22s% 16.4f", "avelnlike:", avelnlike);
    printf("\n%-22s% 16.4f", "explnlike:", -0.5 * (dim * ndata * log(2.0 * M_PI) + x2t - yt2/ndata + dim)); // reference prior
    printf("\n%-22s% 16.4f", "avelnprior:", avelnprior);
    double explnprior;
    explnprior = -0.5*dim*log(2.0 * M_PI)
                 +0.5*dim*log(lambda_0)
                 -0.5*dim*lambda_0/(lambda_0+ndata)
                 -0.5*lambda_0*yt2/((lambda_0+ndata)*(lambda_0+ndata));
    printf("\n%-22s% 16.4f", "explnprior:", explnprior);
    printf("\n%-22s% 16.4f", "avelnprlk:", avelnprlk);
    printf("\n%-22s% 16.4f", "avelnpost:", avelnpost);
    printf("\n%-22s% 16.4f", "explnpost:", -0.5 * dim * log(2.0*M_PI*M_E/ndata)); // reference prior
    printf("\n%-22s% 16.4f", "sqrt avelnprlk2:", sqrt(avelnprlk2));
    printf("\n%-22s% 16.4f", "entropy ln post:", 0.5 * log(2.0 * M_PI * varlnpost * M_E));
    printf("\n%-22s% 16.4f", "posterior entropy:", - avelnpost);
    printf("\n%-22s% 16.4f", "exp lnlik + entropy:", avelnlike - avelnpost);
    printf("\n%-22s% 16.4f", "DIC:", avelnlike - varlnpost);
    printf("\n%-22s% 16.4f", "exact exp ml:", avelnlike + avelnprior - avelnpost);
    printf("\n\n");
    fflush(NULL);

    if (write_files == 1)
        WriteChain("gibbs_gauss.txt", x, iters, dim);

    /* Gibbs with reference priors, calculate exp(lnpost) and exp(lnlike) */
    printf("Reference posterior Gibbs sampling ...\n");
    fflush(NULL);

    double pi_avelnlike = 0.0;
    double pi_avelnpost = 0.0;
    double pi_varlnpost = 0.0;
    double lnposti, lnlikei;
    double inv_ndata = 1.0/ndata;
    double pi_sigma = sqrt(inv_ndata);
    double yj_ndata;
    double delta;

    for (i = 0; i < iters; ++i)
    {
        lnposti = lnlikei = 0.0;
        for (j = 0; j < dim; ++j)
        {
            yj_ndata = y[j]*inv_ndata;
            musimj = gsl_ran_gaussian(r2, pi_sigma) + yj_ndata;
            lnlikei += -0.5 * ndata * ln2pi 
                       -0.5 *(x2[j] - 2.0*musimj*y[j] + ndata*musimj*musimj);
            lnposti += normal_lnpdf(musimj, yj_ndata, inv_ndata);
        }
 
        /* running mean and variance */
        delta = lnposti - pi_avelnpost;
        pi_avelnpost += delta/(i+1);
        pi_varlnpost += delta*(lnposti - pi_avelnpost);

        pi_avelnlike += lnlikei;
    }

    printf("\n%-22s% 16.4f", "pi_avelnlike:", pi_avelnlike/iters);
    printf("\n%-22s% 16.4f", "pi_avelnpost:", pi_avelnpost);
    printf("\n%-22s% 16.4f", "posterior pi-entropy:", -pi_avelnpost);
    printf("\n%-22s% 16.4f", "pi lnlik + entropy:", (pi_avelnlike/iters - pi_avelnpost));
    printf("\n%-22s% 16.4f", "pi_varlnpost:", pi_varlnpost/iters);
    printf("\n%-22s% 16.4f", "pi_DIC:", pi_avelnlike/iters - pi_varlnpost/iters);
    //printf("\n%-22s% 16.4f", "mean:", mean);
    printf("\n\n");
    fflush(NULL);
}


void
GibbsGaussHierarch(const double phi_0, const gsl_rng *r2)
{
    int             i, j;
    double          postvar = 1.0 / (ndata + 1.0);
    double          postsigma = sqrt(postvar);
    double          tmp, musimj;
    const double    ln2pi2 = 0.5*log(2.0*M_PI);
    const double    ln2pi = log(2.0*M_PI);
    double          mu0sim, musum;
    double          postphi = phi_0/(dim*phi_0 + 1);
    double          phisigma = sqrt(postphi);


    /* Now sample posterior of mu with Gibbs */
    printf("Gibbs sampling ...\n");
    fflush(NULL);
    avelnprior = avelnlike = avelnpost = avelnprlk2 = 0.0;

    musum = 0.0;
    for (i = 0; i < iters; ++i)
    {
        lnprior[i] = lnpost[i] = lnlike[i] = 0.0;

        h[i] = mu0sim = gsl_ran_gaussian_ziggurat(r2, phisigma) + postphi*musum;

        for (j = 0; j < dim; ++j)
        {
            x[j][i] = musimj = gsl_ran_gaussian_ziggurat(r2, postsigma) + postvar * (y[j] + mu0sim);

            lnlike[i]  += - ndata * ln2pi2
                          - 0.5 *(x2[j] - 2.0*musimj*y[j] + ndata*musimj*musimj);
            lnprior[i] += - 0.5*ln2pi - 0.5*(musimj - mu0sim)*(musimj - mu0sim);
            lnpost[i]  += - 0.5 * (ndata+1.0) * musimj * musimj
                          + mu0sim*musimj
                          + musimj*y[j]
                          - 0.5* y[j]*y[j]/(ndata+1.0);
        }

        lnprior[i] += - 0.5*ln2pi - 0.5*log(phi_0) - 0.5 * mu0sim * mu0sim / phi_0;
        lnpost[i]  += - 0.5*(dim+1)*ln2pi
                      - 0.5*log(phi_0)
                      + 0.5*log(ndata*dim*phi_0+ndata+1.0)
                      + 0.5*(dim-1.0)*log(ndata+1.0)
                      - 0.5*mu0sim*mu0sim*(dim*phi_0+1.0)/phi_0
                      - 0.5*phi_0 * yt * yt / ((ndata+1.0)*(ndata*dim*phi_0+ndata+1.0));

        //printf("\n%-d% 16.4f % 16.4f % 16.4f", i, lnprior[i], lnlike[i], lnpost[i]);

        avelnprior += lnprior[i];
        avelnlike  += lnlike[i];
        avelnpost  += lnpost[i];
        avelnprlk2 += (lnprior[i] + lnlike[i]) * (lnprior[i] + lnlike[i]);

        musum = 0.0;
        for (j = 0; j < dim; ++j)
            musum += x[j][i];
    }

    avelnprior /= iters;
    avelnlike  /= iters;
    avelnpost  /= iters;
    avelnprlk = avelnprior + avelnlike;
    avelnprlk2 /= iters;

    varlnpost = 0.0;
    for (i = 0; i < iters; ++i)
    {
        tmp = lnprior[i] + lnlike[i] - avelnprlk;
        varlnpost += tmp * tmp;
    }

    varlnpost /= iters;

    printf("Gibbs done ...\n");
    fflush(NULL);

    printf("\n%-22s% 16.4f", "varlnpost:", varlnpost);
    printf("\n%-22s% 16.4f", "avelnlike:", avelnlike);
    printf("\n%-22s% 16.4f", "avelnprior:", avelnprior);
    printf("\n%-22s% 16.4f", "avelnprlk:", avelnprlk);
    printf("\n%-22s% 16.4f", "avelnpost:", avelnpost);
    printf("\n%-22s% 16.4f", "sqrt avelnprlk2:", sqrt(avelnprlk2));
    printf("\n%-22s% 16.4f", "entropy ln post:", 0.5 * log(2.0 * M_PI * varlnpost * M_E));
    printf("\n%-22s% 16.4f", "posterior entropy:", - avelnpost);
    printf("\n%-22s% 16.4f", "exp lnlik + entropy:", avelnlike - avelnpost);
    printf("\n%-22s% 16.4f", "DIC:", avelnlike - varlnpost);
    printf("\n%-22s% 16.4f", "exact exp ml:", avelnlike + avelnprior - avelnpost);
    printf("\n\n");
    fflush(NULL);

    if (write_files == 1)
    {
        WriteChain("gibbs_gauss_hierarch.txt", x, iters, dim);
        WriteChain("gibbs_gauss_hierarch_mu0.txt", &h, iters, 1);
        fflush(NULL);
    }
}


void
GibbsGaussUni(const double lambda_0, const gsl_rng *r2)
{
    int             i;
    double          postvar = 1.0 / (nd + lambda_0);
    double          postsigma = sqrt(postvar);
    double          tmpmu, tmp, factor, musimj;
    double          priorvar = 1.0 / lambda_0;
//     const double    ln2pi2 = 0.5*log(2.0*M_PI);
    const double    ln2pi = log(2.0*M_PI);


    /* Now simulate posterior of mu with Gibbs */
    printf("Gibbs sampling Gaussian uniparameter ...\n");
    fflush(NULL);
    avelnprior = avelnlike = avelnpost = avelnprlk2 = 0.0;
    factor = 1.0 / (lambda_0 + nd);
    tmpmu = yt/(lambda_0 + nd);
    for (i = 0; i < iters; ++i)
    {
        musimj = gsl_ran_gaussian(r2, postsigma) + tmpmu;
        x[0][i] = musimj;
        lnprior[i] = normal_lnpdf(musimj, 0.0, priorvar);
        lnlike[i] = -0.5 * nd * ln2pi 
                    -0.5 *(x2t - 2.0*musimj*yt + nd*musimj*musimj);
        lnpost[i] = normal_lnpdf(musimj, tmpmu, postvar);

        //printf("\n%-d% 16.4f % 16.4f % 16.4f", i, lnprior[i], lnlike[i], lnpost[i]);

        avelnprior += lnprior[i];
        avelnlike  += lnlike[i];
        avelnpost  += lnpost[i];
        avelnprlk2 += (lnprior[i] + lnlike[i]) * (lnprior[i] + lnlike[i]);
    }

    avelnprior /= iters;
    avelnlike  /= iters;
    avelnpost  /= iters;
    avelnprlk = avelnprior + avelnlike;
    avelnprlk2 /= iters;

    varlnpost = 0.0;
    for (i = 0; i < iters; ++i)
    {
        tmp = lnprior[i] + lnlike[i] - avelnprlk;
        varlnpost += tmp * tmp;
    }

    varlnpost /= iters;

    printf("Gibbs done ...\n");
    fflush(NULL);

    printf("\n%-22s% 16.4f", "varlnpost:", varlnpost);
    printf("\n%-22s% 16.4f", "avelnlike:", avelnlike);
    printf("\n%-22s% 16.4f", "explnlike:", -0.5 * (nd * log(2.0 * M_PI) + x2t - (yt*yt/nd) + 1.0)); // analytical exact, posterior expected ln like, reference prior
    printf("\n%-22s% 16.4f", "avelnprior:", avelnprior);
    printf("\n%-22s% 16.4f", "explnprior:", -0.5*(log(2.0 * M_PI) - log(lambda_0) + lambda_0 * (lambda_0 + nd + yt*yt)/((lambda_0+nd)*(lambda_0+nd)) )); // analytical exact
    printf("\n%-22s% 16.4f", "avelnprlk:", avelnprlk);
    printf("\n%-22s% 16.4f", "avelnpost:", avelnpost);
    printf("\n%-22s% 16.4f", "explnpost:", -0.5* (log(2.0*M_PI) - log(nd) + 1.0)); // analytical exact, reference prior
    printf("\n%-22s% 16.4f", "sqrt avelnprlk2:", sqrt(avelnprlk2));
    printf("\n%-22s% 16.4f", "entropy ln post:", 0.5 * log(2.0 * M_PI * varlnpost * M_E));
    printf("\n%-22s% 16.4f", "posterior entropy:", - avelnpost);
    printf("\n%-22s% 16.4f", "exp lnlik + entropy:", avelnlike - avelnpost);
    printf("\n%-22s% 16.4f", "DIC:", avelnlike - varlnpost);
    printf("\n%-22s% 16.4f", "exact exp ml:", avelnlike + avelnprior - avelnpost);
    printf("\n\n");
    fflush(NULL);

    if (write_files == 1)
        WriteChain("gibbs_gauss_uni.txt", x, iters, 1);

    /* Gibbs with reference priors, calculate exp(lnpost) and exp(lnlike) */
    printf("Reference posterior Gibbs sampling Gaussian uniparameter ...\n");
    fflush(NULL);

    double pi_avelnlike = 0.0;
    double pi_avelnpost = 0.0;
    double pi_varlnpost = 0.0;
    double lnposti, lnlikei;
    double inv_nd = 1.0/nd;
    double yt_nd;
    double delta;

    for (i = 0; i < iters; ++i)
    {
        yt_nd = yt*inv_nd;
        musimj = gsl_ran_gaussian(r2, sqrt(inv_nd)) + yt_nd;
        lnlikei = -0.5 * nd * ln2pi 
                  -0.5 *(x2t - 2.0*musimj*yt + nd*musimj*musimj);
        lnposti = normal_lnpdf(musimj, yt_nd, inv_nd);

        /* running mean and variance */
        delta = lnposti - pi_avelnpost;
        pi_avelnpost += delta/(i+1);
        pi_varlnpost += delta*(lnposti - pi_avelnpost);

        pi_avelnlike += lnlikei;
    }

    printf("\n%-22s% 16.4f", "pi_avelnlike:", pi_avelnlike/iters);
    printf("\n%-22s% 16.4f", "pi_avelnpost:", pi_avelnpost);
    printf("\n%-22s% 16.4f", "posterior pi-entropy:", -pi_avelnpost);
    printf("\n%-22s% 16.4f", "pi lnlik + entropy:", (pi_avelnlike/iters - pi_avelnpost));
    printf("\n%-22s% 16.4f", "pi_varlnpost:", pi_varlnpost/iters);
    printf("\n%-22s% 16.4f", "pi_DIC:", pi_avelnlike/iters - pi_varlnpost/iters);
    printf("\n\n");
    fflush(NULL);
}


void
GibbsGaussPrecUni(const double lambda_0, const gsl_rng *r2)
{
    int             i;
    double          tmp, atmp, btmp, binvtmp, musimj;
    double          ln2pi = log(2.0 * M_PI);

    /* prior is gamma; we assume alpha=1, so that prior is exponential with beta=lambda_0 */
    /* Now simulate posterior of mu with Gibbs */
    printf("Gibbs sampling Gaussian precision uniparameter ...\n");
    fflush(NULL);
    avelnprior = avelnlike = avelnpost = avelnprlk2 = 0.0;
    atmp = 1.0+0.5*nd;
    btmp = lambda_0 + 0.5*x2t;
    binvtmp = 1.0/btmp;
    for (i = 0; i < iters; ++i)
    {
        musimj = gsl_ran_gamma(r2, atmp, binvtmp);
        x[0][i] = musimj;
        lnprior[i] = log(lambda_0) - musimj * lambda_0;
        lnlike[i] = -0.5*nd*ln2pi + 0.5*nd * log(musimj) - 0.5*musimj*x2t;
        lnpost[i] = atmp * log(btmp) + (atmp-1.0)*log(musimj)-btmp*musimj - lgamma(atmp);

        //printf("\n%-d% 16.4f % 16.4f % 16.4f", i, lnprior[i], lnlike[i], lnpost[i]);

        avelnprior += lnprior[i];
        avelnlike  += lnlike[i];
        avelnpost  += lnpost[i];
        avelnprlk2 += (lnprior[i] + lnlike[i]) * (lnprior[i] + lnlike[i]);
    }

    avelnprior /= iters;
    avelnlike  /= iters;
    avelnpost  /= iters;
    avelnprlk = avelnprior + avelnlike;
    avelnprlk2 /= iters;

    varlnpost = 0.0;
    for (i = 0; i < iters; ++i)
    {
        tmp = lnprior[i] + lnlike[i] - avelnprlk;
        varlnpost += tmp * tmp;
    }

    varlnpost /= iters;

    printf("Gibbs done ...\n");
    fflush(NULL);

        
    double Eloglam = gsl_sf_psi((nd+2.0)/2.0) - log(lambda_0+0.5*x2t);
    double Elam = (nd+2.0)/(2.0 * lambda_0+x2t);
    double explnlike = -0.5*nd * log(2.0*M_PI) + 0.5*nd*Eloglam - 0.5*x2t * Elam;

    printf("\n%-22s% 16.4f", "varlnpost:", varlnpost);
    printf("\n%-22s% 16.4f", "avelnlike:", avelnlike);
    printf("\n%-22s% 16.4f", "explnlike:", explnlike); // analytical exact, posterior expected ln like
    Eloglam = gsl_sf_psi((nd+2.0)/2.0) - log(0.5*x2t);
    explnlike = 0.5*nd * (Eloglam - log(2.0*M_PI) - 1.0) - 1.0;
    printf("\n%-22s% 16.4f", "explnlike(ref):", explnlike); // analytical exact, posterior expected ln like, reference prior (beta=0)
    printf("\n%-22s% 16.4f", "avelnprior:", avelnprior);
    //printf("\n%-22s% 16.4f", "explnprior:", -0.5*(log(2.0 * M_PI) - log(lambda_0) + lambda_0 * (lambda_0 + nd + yt*yt)/((lambda_0+nd)*(lambda_0+nd)) )); // analytical exact
    printf("\n%-22s% 16.4f", "avelnprlk:", avelnprlk);
    printf("\n%-22s% 16.4f", "avelnpost:", avelnpost);
    //printf("\n%-22s% 16.4f", "explnpost:", -0.5* (log(2.0*M_PI) - log(nd) + 1.0)); // analytical exact, reference prior
    printf("\n%-22s% 16.4f", "sqrt avelnprlk2:", sqrt(avelnprlk2));
    printf("\n%-22s% 16.4f", "entropy ln post:", 0.5 * log(2.0 * M_PI * varlnpost * M_E));
    printf("\n%-22s% 16.4f", "posterior entropy:", - avelnpost);
    printf("\n%-22s% 16.4f", "exp lnlik + entropy:", avelnlike - avelnpost);
    printf("\n%-22s% 16.4f", "DIC:", avelnlike - varlnpost);
    printf("\n%-22s% 16.4f", "exact exp ml:", avelnlike + avelnprior - avelnpost);
    printf("\n\n");
    fflush(NULL);

    if (write_files == 1)
        WriteChain("gibbs_gauss_prec_uni.txt", x, iters, 1);

    /* Gibbs with reference priors, calculate exp(lnpost) and exp(lnlike) */
    printf("Reference posterior Gibbs sampling Gaussian uniparameter ...\n");
    fflush(NULL);

    double pi_avelnlike = 0.0;
    double pi_avelnpost = 0.0;
    double pi_varlnpost = 0.0;
    double lnposti, lnlikei;
    double delta;

    atmp = 0.5*nd;
    btmp = 0.5*x2t;
    binvtmp = 1.0/btmp;

    for (i = 0; i < iters; ++i)
    {
        musimj = gsl_ran_gamma(r2, atmp, binvtmp);
        lnlikei = -0.5*nd*ln2pi + 0.5*nd * log(musimj) - 0.5*musimj*x2t;
        lnposti = atmp * log(btmp) + (atmp-1.0)*log(musimj)-btmp*musimj - lgamma(atmp);

        /* running mean and variance */
        delta = lnposti - pi_avelnpost;
        pi_avelnpost += delta/(i+1);
        pi_varlnpost += delta*(lnposti - pi_avelnpost);

        pi_avelnlike += lnlikei;
    }

    printf("\n%-22s% 16.4f", "pi_avelnlike:", pi_avelnlike/iters);
    printf("\n%-22s% 16.4f", "pi_avelnpost:", pi_avelnpost);
    printf("\n%-22s% 16.4f", "posterior pi-entropy:", -pi_avelnpost);
    printf("\n%-22s% 16.4f", "pi lnlik + entropy:", (pi_avelnlike/iters - pi_avelnpost));
    printf("\n%-22s% 16.4f", "pi_varlnpost:", pi_varlnpost/iters);
    printf("\n%-22s% 16.4f", "pi_DIC:", pi_avelnlike/iters - pi_varlnpost/iters);
    printf("\n\n");
    fflush(NULL);
}


void
GibbsExpo(const double alpha_0, const gsl_rng *r2)
{
    int             i, j;
    double          tmp, musimj;
    const double    beta_0 = alpha_0;


    /* Now simulate posterior of mu with Gibbs */
    printf("Gibbs sampling ...\n");
    fflush(NULL);

    avelnprior = avelnlike = avelnpost = avelnprlk2 = 0.0;
    for (i = 0; i < iters; ++i)
    {
        lnprior[i] = lnlike[i] = lnpost[i] = 0.0;

        for (j = 0; j < dim; ++j)
        {
            musim[j] = gsl_ran_gamma(r2, alpha_0 + ndata, 1.0/(beta_0 + y[j]));
            x[j][i] = musim[j];
            lnprior[i] += alpha_0 * log(beta_0) - lgamma(alpha_0)
                          + (alpha_0-1.0)*log(musim[j]) - beta_0*musim[j];
            lnlike[i] += ndata*log(musim[j]) - musim[j]*y[j];
            lnpost[i] += (alpha_0 + ndata)*log(beta_0+y[j]) - lgamma(alpha_0+ndata)
                        +(alpha_0+ndata-1.0)*log(musim[j])
                        - musim[j]*(beta_0+y[j]);
        }

        //printf("\n%-d% 16.4f % 16.4f % 16.4f", i, lnprior[i], lnlike[i], lnpost[i]);

        avelnprior += lnprior[i];
        avelnlike  += lnlike[i];
        avelnpost  += lnpost[i];
        avelnprlk2 += (lnprior[i] + lnlike[i]) * (lnprior[i] + lnlike[i]);
    }

    avelnprior /= iters;
    avelnlike  /= iters;
    avelnpost  /= iters;
    avelnprlk = avelnprior + avelnlike;
    avelnprlk2 /= iters;

    varlnpost = 0.0;
    for (i = 0; i < iters; ++i)
    {
        tmp = lnprior[i] + lnlike[i] - avelnprlk;
        varlnpost += tmp * tmp;
    }

    varlnpost /= iters;

    printf("Gibbs done ...\n");
    fflush(NULL);

    //printf("\n%-22s% 16.4f", "log c:", -0.5 * log(2.0 * M_PI * varlnpost * M_E) + avelnlike + avelnprior); // this one is probably meaningless
    printf("\n%-22s% 16.4f", "varlnpost:", varlnpost);
    printf("\n%-22s% 16.4f", "avelnlike:", avelnlike);
    printf("\n%-22s% 16.4f", "avelnprior:", avelnprior);
    printf("\n%-22s% 16.4f", "avelnprlk:", avelnprlk);
    printf("\n%-22s% 16.4f", "avelnpost:", avelnpost);
    printf("\n%-22s% 16.4f", "sqrt avelnprlk2:", sqrt(avelnprlk2));
    printf("\n%-22s% 16.4f", "entropy ln post:", 0.5 * log(2.0 * M_PI * varlnpost * M_E));
    printf("\n%-22s% 16.4f", "posterior entropy:", - avelnpost);
    printf("\n%-22s% 16.4f", "exp lnlik + entropy:", avelnlike - avelnpost);
    printf("\n%-22s% 16.4f", "DIC:", avelnlike - varlnpost);
    printf("\n%-22s% 16.4f", "exact exp ml:", avelnlike + avelnprior - avelnpost);
    printf("\n\n");
    fflush(NULL);

    if (write_files == 1)
        WriteChain("gibbs_expo.txt", x, iters, dim);

    /* Gibbs with reference priors, calculate exp(lnpost) and exp(lnlike) */
    printf("Reference posterior Gibbs sampling ...\n");
    fflush(NULL);

    double pi_avelnlike = 0.0;
    double pi_avelnpost = 0.0;
    double lnposti, lnlikei;

    for (i = 0; i < iters; ++i)
    {
        lnposti = lnlikei = 0.0;
        for (j = 0; j < dim; ++j)
        {
            musimj = gsl_ran_gamma(r2, ndata, 1.0/y[j]);
            lnlikei += ndata*log(musimj) - musimj*y[j];
            lnposti += ndata*log(y[j]) - lgamma(ndata)
                        +(ndata-1.0)*log(musimj)
                        - musimj*y[j];
        }

        pi_avelnlike += lnlikei;
        pi_avelnpost += lnposti;
    }

    printf("\n%-22s% 16.4f", "pi_avelnlike:", pi_avelnlike/iters);
    printf("\n%-22s% 16.4f", "pi_avelnpost:", pi_avelnpost/iters);
    printf("\n%-22s% 16.4f", "posterior pi-entropy:", -pi_avelnpost/iters);
    printf("\n%-22s% 16.4f", "pi lnlik + entropy:", (pi_avelnlike - pi_avelnpost)/iters);
    printf("\n\n");
    fflush(NULL);
}


void
GibbsExpoUni(const double alpha_0, const gsl_rng *r2)
{
    int             i;
    double          tmp, musimj;
    const double    beta_0 = alpha_0;


    /* Now simulate posterior of mu with Gibbs */
    printf("Gibbs sampling ...\n");
    fflush(NULL);

    avelnprior = avelnlike = avelnpost = avelnprlk2 = 0.0;
    for (i = 0; i < iters; ++i)
    {
        musimj = gsl_ran_gamma(r2, alpha_0 + nd, 1.0/(beta_0 + yt));
        x[0][i] = musimj;
        lnprior[i] = alpha_0 * log(beta_0) - lgamma(alpha_0)
                     + (alpha_0-1.0)*log(musimj) - beta_0*musimj;
        lnlike[i] = nd*log(musimj) - musimj*yt;
        lnpost[i] = (alpha_0 + nd)*log(beta_0+yt) - lgamma(alpha_0+nd)
                    +(alpha_0+nd-1.0)*log(musimj)
                    - musimj*(beta_0+yt);

        //printf("\n%-d% 16.4f % 16.4f % 16.4f", i, lnprior[i], lnlike[i], lnpost[i]);

        avelnprior += lnprior[i];
        avelnlike  += lnlike[i];
        avelnpost  += lnpost[i];
        avelnprlk2 += (lnprior[i] + lnlike[i]) * (lnprior[i] + lnlike[i]);
    }

    avelnprior /= iters;
    avelnlike  /= iters;
    avelnpost  /= iters;
    avelnprlk = avelnprior + avelnlike;
    avelnprlk2 /= iters;

    varlnpost = 0.0;
    for (i = 0; i < iters; ++i)
    {
        tmp = lnprior[i] + lnlike[i] - avelnprlk;
        varlnpost += tmp * tmp;
    }

    varlnpost /= iters;

    printf("Gibbs done ...\n");
    fflush(NULL);

    printf("\n%-22s% 16.4f", "varlnpost:", varlnpost);
    printf("\n%-22s% 16.4f", "avelnlike:", avelnlike);
    printf("\n%-22s% 16.4f", "explnlike:", nd * (gsl_sf_psi(nd) - log(yt) - 1.0));
    printf("\n%-22s% 16.4f", "avelnprior:", avelnprior);
    printf("\n%-22s% 16.4f", "explnprior:", alpha_0 * log(beta_0) - lgamma(alpha_0) 
                                            + (alpha_0-1.0) * (gsl_sf_psi(alpha_0 + nd) - log(beta_0+yt)) 
                                            - beta_0 * (alpha_0+nd)/(beta_0+yt) ); // analytical exact
    printf("\n%-22s% 16.4f", "avelnprlk:", avelnprlk);
    printf("\n%-22s% 16.4f", "avelnpost:", avelnpost);
    printf("\n%-22s% 16.4f", "explnpost:", log(yt) - lgamma(nd) + gsl_sf_psi(nd)*(nd-1.0) - nd);
    printf("\n%-22s% 16.4f", "sqrt avelnprlk2:", sqrt(avelnprlk2));
    printf("\n%-22s% 16.4f", "entropy ln post:", 0.5 * log(2.0 * M_PI * varlnpost * M_E));
    printf("\n%-22s% 16.4f", "posterior entropy:", - avelnpost);
    printf("\n%-22s% 16.4f", "exp lnlik + entropy:", avelnlike - avelnpost);
    printf("\n%-22s% 16.4f", "DIC:", avelnlike - varlnpost);
    printf("\n%-22s% 16.4f", "exact exp ml:", avelnlike + avelnprior - avelnpost);
    printf("\n\n");
    fflush(NULL);

    if (write_files == 1)
        WriteChain("gibbs_expo_uni.txt", x, iters, 1);

    /* Gibbs with reference priors, calculate exp(lnpost) and exp(lnlike) */
    printf("Reference posterior Gibbs sampling ...\n");
    fflush(NULL);

    double pi_avelnlike = 0.0;
    double pi_avelnpost = 0.0;
    double lnposti, lnlikei;

    for (i = 0; i < iters; ++i)
    {
        musimj = gsl_ran_gamma(r2, nd, 1.0/yt);
        lnlikei = nd*log(musimj) - musimj*yt;
        lnposti = nd*log(yt) - lgamma(nd)
                    +(nd-1.0)*log(musimj)
                    - musimj*yt;

        pi_avelnlike += lnlikei;
        pi_avelnpost += lnposti;
    }

    printf("\n%-22s% 16.4f", "pi_avelnlike:", pi_avelnlike/iters);
    printf("\n%-22s% 16.4f", "pi_avelnpost:", pi_avelnpost/iters);
    printf("\n%-22s% 16.4f", "posterior pi-entropy:", -pi_avelnpost/iters);
    printf("\n%-22s% 16.4f", "pi lnlik + entropy:", (pi_avelnlike - pi_avelnpost)/iters);
    printf("\n\n");
    fflush(NULL);
}


void
Usage(void)
{
    printf("\n                           <  BEGIN MARG  > \n");
    printf("I===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-==I\n");
    printf("  Usage: \n");
    printf("    marg [options] <data file> \n\n");
    printf("    -b  burnin (as a fraction) \n");
    printf("    -d  # of dimensions in models \n");
    printf("    -e  exponential models \n");
    printf("    -f  write samples to file \n");
    printf("    -g  Gaussian models \n");
    printf("    -H  calculate entropy of data in file \n");
    printf("    -i  # of samples or sampling iterations \n");
    printf("    -l  lambda, prior precision \n");
    printf("    -n  # of data points per dimension \n");
    printf("    -p  parallel simulation \n");
    printf("    -s  seed for random number generators \n");
    printf("I===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-===-==I\n");
    printf("                            <  END MARG  > \n\n\n");
//     printf("                            %s<  END THESEUS %s  >%s \n\n\n",
//            tc_RED, VERSION, tc_NC);
    fflush(NULL);
}


void
GetOpts(int argc, char *argv[])
{
    int            option;

    /* get the options */
    while ((option = getopt(argc, argv, "b:d:efgHi:l:m:n:ps:t:")) != -1)
    {
        switch (option)
        {
/*
            case 'P':
                sscanf(optarg, "%lf:%lf:%lf:%lf:%lf:%lf:%lf:%lf:%lf:%lf:%lf:%lf",
                       &prior[0], &prior[1], &prior[2], &prior[3],
                       &prior[4], &prior[5], &prior[6], &prior[7],
                       &prior[8], &prior[9], &prior[10], &prior[11]);
                for (i = 0; i < dim; ++i)
                    prior[i] *= 0.5;
                break;
*/
            case 'b':
                burnin = (double) strtod(optarg, NULL);

                if (burnin > 0.0 && burnin < 1.0)
                    burnin = 1.0 - burnin;
                else
                    burnin = 0.5;
                break;

            case 'd':
                dim = (int) strtol(optarg, NULL, 10);
                break;

            case 'e':
                expo_model = 1;
                break;

            case 'f':
                write_files = 1;
                break;

            case 'g':
                gauss_model = 1;
                break;

            case 'H':
                entropy_calc = 1;
                break;

            case 'i':
                iters = (int) strtol(optarg, NULL, 10);
                break;

            case 'l':
                lambda_0 = (double) strtod(optarg, NULL);
                break;

            case 'n':
                ndata = (double) strtod(optarg, NULL);
                break;

            case 'p':
                parallel = 1;
                break;

            case 's':
                seed = (int) strtol(optarg, NULL, 10);
                break;

            case 't':
                thrdnum = (int) strtol(optarg, NULL, 10);
                break;

            default:
                perror("\n\n  ERROR");
                fprintf(stderr, "\nBad option '-%c' \n", optopt);
                Usage();
                exit(EXIT_FAILURE);
                break;
        }
    }
}


int
main(int argc, char *argv[])
{
    int            i, narguments;
    double         hme, marglik;
//    double         ln2pi = log(2.0 * M_PI);

    const gsl_rng_type     *T = NULL;
    gsl_rng                *r2 = NULL;

    if (argc == 1)
    {
        Usage();
        exit(EXIT_FAILURE);
    }

    GetOpts(argc, argv);

    narguments = argc - optind; /* number of nonoption args */
    argv += optind; /* now argv is set with first arg = argv[0] */

    thrdnum = dim;

    GibbsData    **gibbsdata = malloc(thrdnum * sizeof(GibbsData *));
    pthread_t     *callThd = malloc(thrdnum * sizeof(pthread_t));
    pthread_attr_t  attr;

    pthread_attr_init(&attr);
/*     pthread_mutexattr_t mattr; */
/*     pthread_mutexattr_init(&mattr); */
/*     pthread_mutexattr_settype(&mattr, PTHREAD_MUTEX_ERRORCHECK); */
/*     pthread_mutexattr_settype(&mattr, PTHREAD_MUTEX_NORMAL); */
/*     pthread_attr_getstacksize (&attr, &stacksize); */
/*     printf("\nDefault stack size = %d", (int) stacksize); */
    pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
    pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM);

    for (i = 0; i < thrdnum; ++i)
        gibbsdata[i] = malloc(sizeof(GibbsData));

    nd = ndata * dim;

    gsl_rng_env_setup();
    if (seed == 0)
        gsl_rng_default_seed = time(NULL);
    else
        gsl_rng_default_seed = seed;
    T = gsl_rng_ranlxd2;
    r2 = gsl_rng_alloc(T);
    //gsl_rng_set (r2, 1);

    cov = MatAlloc(dim, dim);
    lnpost = calloc(iters, sizeof(double));
    lnlike = calloc(iters, sizeof(double));
    lnprior = calloc(iters, sizeof(double));
    h = calloc(iters, sizeof(double));
    pave = calloc(dim, sizeof(double));
    x = MatAlloc(dim, iters);
    data = calloc(dim, sizeof(double *));
    for (i = 0; i < dim; ++i)
        data[i] = calloc(ndata, sizeof(double));
    y = calloc(dim, sizeof(double));
    musim = calloc(dim, sizeof(double));
    x2 = calloc(dim, sizeof(double));

    /************************************************************************************/
    if (entropy_calc == 1)
    {
        double         entropy;

        RandVec(x, dim, iters, r2);

        entropy = CalcEdgeworthVanHulleEntropy(x, dim, iters);

        printf("\n-d ln(n):   %14.3f", -dim * log(ndata));
        printf("\nentropy:    %14.3f", entropy);
        printf ("\n\n");
        fflush(NULL);

        exit(EXIT_SUCCESS);
    }

    /************************************************************************************/
    if (parallel == 1)
    {
        //SimGaussPth(data, gibbsdata, callThd, &attr, thrdnum);
        SimExpoPth(data, gibbsdata, callThd, &attr, thrdnum);
    }
    else
    {
        //SimGauss(r2);
        SimExpo(r2);
    }

    CalcCumulants();

    /************************************************************************************/
    if (gauss_model == 1)
    {
        printf("\n************************************************************************************");
        printf("\nHierarchical gaussian model:\n");
        double phi_0 = 1.0 / lambda_0;
        GibbsGaussHierarch(phi_0, r2);

        CalcLaplaceMet();

        printf("\n%-22s% 14d", "dim:", dim);

        hme = CalcHarmonicMean(lnlike, iters);
        printf("\n%-22s% 16.4f\n", "hme:", hme);
        fflush(NULL);

        /* gaussian model, hyperprior mu=0 */
        marglik = -0.5*(ndata*dim)* log(2.0*M_PI)
                  -0.5*log(ndata*dim*phi_0 + ndata + 1.0)
                  -0.5*(dim-1.0)*log(ndata + 1.0)
                  -0.5*x2t
                  +0.5*yt2/(ndata+1.0)
                  +0.5*phi_0 * yt * yt / ((ndata+1.0)*(ndata*dim*phi_0+ndata+1.0));

        printf("\nanalytical marginal likelihood: %16.4f\n", marglik);
    }

    /************************************************************************************/
    /* gaussian model */
    if (gauss_model == 1)
    {
        printf("\n************************************************************************************");
        printf("\nGaussian model:\n");
        GibbsGauss(lambda_0, r2);
        CalcLaplaceMet();

        printf("\n%-22s% 14d", "dim:", dim);

        hme = CalcHarmonicMean(lnlike, iters);
        printf("\n%-22s% 16.4f\n", "hme:", hme);
        fflush(NULL);

    //    marglik = - 0.5 * dim * log(2.0 * M_PI) - 0.5 * dim * log(1.0 + nu); // SimGaussLP

        marglik = -0.5*(ndata*dim)* log(2.0*M_PI)
                  +0.5*dim*log(lambda_0/(lambda_0+ndata))
                  -0.5*x2t
                  +0.5*yt2/(lambda_0+ndata);

        printf("\nanalytical marginal likelihood: %16.4f\n", marglik);
    }

    /************************************************************************************/
    /* gaussian one-param model, unknown mu location parameter */
    if (gauss_model == 1)
    {
        printf("\n************************************************************************************");
        printf("\nGaussian one-parameter model:\n");
        GibbsGaussUni(lambda_0, r2);
        CalcLaplaceMetUni();

        hme = CalcHarmonicMean(lnlike, iters);
        printf("\n%-22s% 16.4f\n", "hme:", hme);
        fflush(NULL);

        marglik = 0.5*(- x2t + yt*yt/(lambda_0+nd) 
                       - nd*log(2.0*M_PI) 
                       + log(lambda_0/(lambda_0 + nd)));

        printf("\nanalytical marginal likelihood: %16.4f\n", marglik);
    }

    /************************************************************************************/
    /* gaussian one-param model, known mu=0, unknown lambda precision parameter */
    if (gauss_model == 1)
    {
        printf("\n************************************************************************************");
        printf("\nGaussian one-parameter precision model:\n");
        GibbsGaussPrecUni(lambda_0, r2);
        CalcLaplaceMetUni();

        hme = CalcHarmonicMean(lnlike, iters);
        printf("\n%-22s% 16.4f\n", "hme:", hme);
        fflush(NULL);

        /* gaussian model */
        double atmp, btmp;
        
        atmp = 1.0+0.5*nd;
        btmp = lambda_0 + 0.5*x2t;

        marglik = - 0.5*nd*log(2.0*M_PI) +log(lambda_0) - atmp*log(btmp) + lgamma(atmp);
        printf("\nanalytical marginal likelihood (normal mu=0): %16.4f\n", marglik);
    }

    /************************************************************************************/
    /* Exponential model */
    if (expo_model == 1)
    {
        printf("\n************************************************************************************");
        printf("\nExponential model:\n");
        GibbsExpo(lambda_0, r2);
        CalcLaplaceMet();

        printf("\n%-22s% 14d", "dim:", dim);

        hme = CalcHarmonicMean(lnlike, iters);
        printf("\n%-22s% 16.4f\n", "hme:", hme);
        fflush(NULL);

        marglik = 0.0;
        for (i = 0; i < dim; ++i)
            marglik += log(lambda_0+y[i]);

        marglik = dim *(lgamma(lambda_0+ndata)-lgamma(lambda_0)+lambda_0*log(lambda_0))
                  - (lambda_0+ndata)*marglik;

        printf("\nanalytical marginal likelihood: %16.4f\n", marglik);
    }

    /************************************************************************************/
    /* Exponential one-param model */
    if (expo_model == 1)
    {
        printf("\n************************************************************************************");
        printf("\nExponential one-parameter model:\n");
        GibbsExpoUni(lambda_0, r2);
        CalcLaplaceMetUni();

        hme = CalcHarmonicMean(lnlike, iters);
        printf("\n%-22s% 16.4f\n", "hme:", hme);
        fflush(NULL);

        marglik = 0.0;
        for (i = 0; i < dim; ++i)
            marglik += log(lambda_0+y[i]);

        marglik = lgamma(lambda_0+nd) - lgamma(lambda_0) + lambda_0*log(lambda_0)
                  - (lambda_0+nd)*log(lambda_0+yt);

        printf("\nanalytical marginal likelihood: %16.4f\n", marglik);
    }

    /************************************************************************************/
//    printf("\nwriting files ...\n\n");
    printf("\n");
    fflush(NULL);

//     fp = fopen("lnL.txt" ,"w");
// 
//     for (i = 0; i < iters; ++i)
//         fprintf(fp, "%-12.3f\n", lnlike[i]);
// 
//     fprintf(fp, "\n\n");
//     fclose(fp);

    /************************************************************************************/
    for (i = 0; i < dim; ++i)
        free(data[i]);
    free(data);
    MatDestroy(&x);
    free(lnpost);
    free(lnprior);
    free(h);
    free(pave);
    free(y);
    free(musim);
    free(x2);
    MatDestroy(&cov);

    pthread_attr_destroy(&attr);
    for (i = 0; i < thrdnum; ++i)
        free(gibbsdata[i]);
    free(gibbsdata);
    free(callThd);

    gsl_rng_free(r2);
    r2 = NULL;

    exit(EXIT_SUCCESS);
}


