/************************************************************************************\
*                                                                                    *
* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved.                *
*                                                                                    *
* Redistribution and use in source and binary forms, with or without modification,   *
* are permitted provided that the following conditions are met:                      *
*                                                                                    *
*  · Redistributions of source code must retain the above copyright notice, this     *
*    list of conditions and the following disclaimer.                                *
*                                                                                    *
*  · Redistributions in binary form must reproduce the above copyright notice, this  *
*    list of conditions and the following disclaimer in the documentation and/or     *
*    other materials provided with the distribution.                                 *
*                                                                                    *
*  · The name of EWM may not be used to endorse or promote products derived from     *
*    this software without specific prior written permission.                        *
*                                                                                    *
* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES,    *
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND       *
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE   *
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS  *
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY      *
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     *
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN  *
* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                                      *
*                                                                                    *
* For any issues regarding this software and its use, contact EWM at:                *
*                                                                                    *
*   Eugene W. Myers Jr.                                                              *
*   Bautzner Str. 122e                                                               *
*   01099 Dresden                                                                    *
*   GERMANY                                                                          *
*   Email: gene.myers@gmail.com                                                      *
*                                                                                    *
\************************************************************************************/

/*******************************************************************************************
 *
 *  Compressed data base module.  Auxiliary routines to open and manipulate a data base for
 *    which the sequence and read information are separated into two separate files, and the
 *    sequence is compressed into 2-bits for each base.  Support for tracks of additional
 *    information, and trimming according to the current partition.  Eventually will also
 *    support compressed quality information.
 *
 *  Author :  Gene Myers
 *  Date   :  July 2013
 *  Revised:  April 2014
 *
 ********************************************************************************************/

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <unistd.h>
#include <dirent.h>

#include "DB.h"

#ifdef HIDE_FILES
#define PATHSEP "/."
#else
#define PATHSEP "/"
#endif


/*******************************************************************************************
 *
 *  GENERAL UTILITIES
 *
 ********************************************************************************************/

char *Prog_Name;

void *Malloc(int64 size, char *mesg)
{ void *p;

  if ((p = malloc(size)) == NULL)
    { if (mesg == NULL)
        fprintf(stderr,"%s: Out of memory\n",Prog_Name);
      else
        fprintf(stderr,"%s: Out of memory (%s)\n",Prog_Name,mesg);
    }
  return (p);
}

void *Realloc(void *p, int64 size, char *mesg)
{ if ((p = realloc(p,size)) == NULL)
    { if (mesg == NULL)
        fprintf(stderr,"%s: Out of memory\n",Prog_Name);
      else
        fprintf(stderr,"%s: Out of memory (%s)\n",Prog_Name,mesg);
    }
  return (p);
}

char *Strdup(char *name, char *mesg)
{ char *s;

  if (name == NULL)
    return (NULL);
  if ((s = strdup(name)) == NULL)
    { if (mesg == NULL)
        fprintf(stderr,"%s: Out of memory\n",Prog_Name);
      else
        fprintf(stderr,"%s: Out of memory (%s)\n",Prog_Name,mesg);
    }
  return (s);
}

FILE *Fopen(char *name, char *mode)
{ FILE *f;

  if (name == NULL || mode == NULL)
    return (NULL);
  if ((f = fopen(name,mode)) == NULL)
    fprintf(stderr,"%s: Cannot open %s for '%s'\n",Prog_Name,name,mode);
  return (f);
}

char *PathTo(char *name)
{ char *path, *find;

  if (name == NULL)
    return (NULL);
  if ((find = rindex(name,'/')) != NULL)
    { *find = '\0';
      path = Strdup(name,"Extracting path from");
      *find = '/';
    }
  else
    path = Strdup(".","Allocating default path");
  return (path);
}

char *Root(char *name, char *suffix)
{ char *path, *find, *dot;
  int   epos;

  if (name == NULL)
    return (NULL);
  find = rindex(name,'/');
  if (find == NULL)
    find = name;
  else
    find += 1;
  if (suffix == NULL)
    { dot = strchr(find,'.');
      if (dot != NULL)
        *dot = '\0';
      path = Strdup(find,"Extracting root from");
      if (dot != NULL)
        *dot = '.';
    }
  else
    { epos  = strlen(find);
      epos -= strlen(suffix);
      if (epos > 0 && strcasecmp(find+epos,suffix) == 0)
        { find[epos] = '\0';
          path = Strdup(find,"Extracting root from");
          find[epos] = suffix[0];
        }
      else
        path = Strdup(find,"Allocating root");
    }
  return (path);
}

char *Catenate(char *path, char *sep, char *root, char *suffix)
{ static char *cat = NULL;
  static int   max = -1;
  int len;

  if (path == NULL || root == NULL || sep == NULL || suffix == NULL)
    return (NULL);
  len =  strlen(path);
  len += strlen(sep);
  len += strlen(root);
  len += strlen(suffix);
  if (len > max)
    { max = ((int) (1.2*len)) + 100;
      if ((cat = (char *) realloc(cat,max+1)) == NULL)
        { fprintf(stderr,"%s: Out of memory (Making path name for %s)\n",Prog_Name,root);
          return (NULL);
        }
    }
  sprintf(cat,"%s%s%s%s",path,sep,root,suffix);
  return (cat);
}

char *Numbered_Suffix(char *left, int num, char *right)
{ static char *suffix = NULL;
  static int   max = -1;
  int len;

  if (left == NULL || right == NULL)
    return (NULL);
  len =  strlen(left);
  len += strlen(right) + 40;
  if (len > max)
    { max = ((int) (1.2*len)) + 100;
      if ((suffix = (char *) realloc(suffix,max+1)) == NULL)
        { fprintf(stderr,"%s: Out of memory (Making number suffix for %d)\n",Prog_Name,num);
          return (NULL);
        }
    }
  sprintf(suffix,"%s%d%s",left,num,right);
  return (suffix);
}


#define  COMMA  ','

//  Print big integers with commas/periods for better readability

void Print_Number(int64 num, int width, FILE *out)
{ if (width == 0)
    { if (num < 1000ll)
        fprintf(out,"%lld",num);
      else if (num < 1000000ll)
        fprintf(out,"%lld%c%03lld",num/1000ll,COMMA,num%1000ll);
      else if (num < 1000000000ll)
        fprintf(out,"%lld%c%03lld%c%03lld",num/1000000ll,
                                           COMMA,(num%1000000ll)/1000ll,COMMA,num%1000ll);
      else
        fprintf(out,"%lld%c%03lld%c%03lld%c%03lld",num/1000000000ll,
                                                  COMMA,(num%1000000000ll)/1000000ll,
                                                  COMMA,(num%1000000ll)/1000ll,COMMA,num%1000ll);
    }
  else
    { if (num < 1000ll)
        fprintf(out,"%*lld",width,num);
      else if (num < 1000000ll)
        { if (width <= 4)
            fprintf(out,"%lld%c%03lld",num/1000ll,COMMA,num%1000ll);
          else
            fprintf(out,"%*lld%c%03lld",width-4,num/1000ll,COMMA,num%1000ll);
        }
      else if (num < 1000000000ll)
        { if (width <= 8)
            fprintf(out,"%lld%c%03lld%c%03lld",num/1000000ll,COMMA,(num%1000000ll)/1000ll,
                                               COMMA,num%1000ll);
          else
            fprintf(out,"%*lld%c%03lld%c%03lld",width-8,num/1000000ll,COMMA,(num%1000000ll)/1000ll,
                                                COMMA,num%1000ll);
        }
      else
        { if (width <= 12)
            fprintf(out,"%lld%c%03lld%c%03lld%c%03lld",num/1000000000ll,COMMA,
                                                       (num%1000000000ll)/1000000ll,COMMA,
                                                       (num%1000000ll)/1000ll,COMMA,num%1000ll);
          else
            fprintf(out,"%*lld%c%03lld%c%03lld%c%03lld",width-12,num/1000000000ll,COMMA,
                                                            (num%1000000000ll)/1000000ll,COMMA,
                                                            (num%1000000ll)/1000ll,COMMA,num%1000ll);
        }
    }
}

//  Return the number of digits, base 10, of num

int  Number_Digits(int64 num)
{ int digit;

  digit = 0;
  while (num >= 1)
    { num /= 10;
      digit += 1;
    }
  return (digit);
}


/*******************************************************************************************
 *
 *  READ COMPRESSION/DECOMPRESSION UTILITIES
 *
 ********************************************************************************************/

//  Compress read into 2-bits per base (from [0-3] per byte representation

void Compress_Read(int len, char *s)
{ int   i;
  char  c, d;
  char *s0, *s1, *s2, *s3;

  s0 = s;
  s1 = s0+1;
  s2 = s1+1;
  s3 = s2+1;

  c = s1[len];
  d = s2[len];
  s0[len] = s1[len] = s2[len] = 0;

  for (i = 0; i < len; i += 4)
    *s++ = (char ) ((s0[i] << 6) | (s1[i] << 4) | (s2[i] << 2) | s3[i]);

  s1[len] = c;
  s2[len] = d;
}

//  Uncompress read form 2-bits per base into [0-3] per byte representation

void Uncompress_Read(int len, char *s)
{ int   i, tlen, byte;
  char *s0, *s1, *s2, *s3;
  char *t;

  s0 = s;
  s1 = s0+1;
  s2 = s1+1;
  s3 = s2+1;

  tlen = (len-1)/4;

  t = s+tlen;
  for (i = tlen*4; i >= 0; i -= 4)
    { byte = *t--;
      s0[i] = (char) ((byte >> 6) & 0x3);
      s1[i] = (char) ((byte >> 4) & 0x3);
      s2[i] = (char) ((byte >> 2) & 0x3);
      s3[i] = (char) (byte & 0x3);
    }
  s[len] = 4;
}

//  Convert read in [0-3] representation to ascii representation (end with '\n')

void Lower_Read(char *s)
{ static char letter[4] = { 'a', 'c', 'g', 't' };

  for ( ; *s != 4; s++)
    *s = letter[(int) *s];
  *s = '\0';
}

void Upper_Read(char *s)
{ static char letter[4] = { 'A', 'C', 'G', 'T' };

  for ( ; *s != 4; s++)
    *s = letter[(int) *s];
  *s = '\0';
}

//  Convert read in ascii representation to [0-3] representation (end with 4)

void Number_Read(char *s)
{ static char number[128] =
    { 0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 1, 0, 0, 0, 2,
      0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 3, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 1, 0, 0, 0, 2,
      0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 3, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0,
    };

  for ( ; *s != '\0'; s++)
    *s = number[(int) *s];
  *s = 4;
}


/*******************************************************************************************
 *
 *  DB OPEN, TRIM & CLOSE ROUTINES
 *
 ********************************************************************************************/


// Open the given database or dam, "path" into the supplied HITS_DB record "db". If the name has
//   a part # in it then just the part is opened.  The index array is allocated (for all or
//   just the part) and read in.
// Return status of routine:
//    -1: The DB could not be opened for a reason reported by the routine to stderr
//     0: Open of DB proceeded without mishap
//     1: Open of DAM proceeded without mishap

int Open_DB(char* path, HITS_DB *db)
{ char *root, *pwd, *bptr, *fptr, *cat;
  int   nreads;
  FILE *index, *dbvis;
  int   status, plen, isdam;
  int   part, cutoff, all;
  int   ofirst, bfirst, olast;


  plen = strlen(path);
  if (strcmp(path+(plen-4),".dam") == 0)
    root = Root(path,".dam");
  else
    root = Root(path,".db");
  pwd = PathTo(path);

  bptr = rindex(root,'.');
  if (bptr != NULL && bptr[1] != '\0' && bptr[1] != '-')
    { part = strtol(bptr+1,&fptr,10);
      if (*fptr != '\0' || part == 0)
        part = 0;
      else
        *bptr = '\0';
    }
  else
    part = 0;

  isdam = 0;
  cat = Catenate(pwd,"/",root,".db");
  if (cat == NULL)
    exit (1);
  if ((dbvis = fopen(cat,"r")) == NULL)
    { cat = Catenate(pwd,"/",root,".dam");
      if (cat == NULL)
        exit (1);
      if ((dbvis = fopen(cat,"r")) == NULL)
        { status = -1;
          fprintf(stderr,"%s: Could not open database %s\n",Prog_Name,path);
          goto exit;
        }
      isdam = 1;
    }

  if ((index = Fopen(Catenate(pwd,PATHSEP,root,".idx"),"rm")) == NULL)
    { status = -1;
      goto exit1;
    }
  if (fread(db,sizeof(HITS_DB),1,index) != 1)
    SYSTEM_ERROR
  nreads = db->oreads;

  { int   p, nblocks, nfiles, blast;
    int64 size;
    char  fname[MAX_NAME], prolog[MAX_NAME];

    nblocks = 0;
    if (fscanf(dbvis,DB_NFILE,&nfiles) != 1)
      SYSTEM_ERROR
    for (p = 0; p < nfiles; p++)
      if (fscanf(dbvis,DB_FDATA,&blast,fname,prolog) != 3)
        SYSTEM_ERROR
    if (fscanf(dbvis,DB_NBLOCK,&nblocks) != 1)
      if (part == 0)
        { cutoff = 0;
          all    = 1;
        }
      else
        { fprintf(stderr,"%s: DB %s has not yet been partitioned, cannot request a block !\n",
                         Prog_Name,root);
          status = -1;
          goto exit2;
        }
    else
      { if (fscanf(dbvis,DB_PARAMS,&size,&cutoff,&all) != 3)
          SYSTEM_ERROR
        if (part > nblocks)
          { fprintf(stderr,"%s: DB %s has only %d blocks\n",Prog_Name,root,nblocks);
            status = -1;
            goto exit2;
          }
      }

    if (part > 0)
      { for (p = 1; p <= part; p++)
          if (fscanf(dbvis,DB_BDATA,&ofirst,&bfirst) != 2)
            SYSTEM_ERROR
        if (fscanf(dbvis,DB_BDATA,&olast,&blast) != 2)
          SYSTEM_ERROR
      }
    else
      { ofirst = bfirst = 0;
        olast  = nreads;
      }
  }

  db->trimmed = 0;
  db->tracks  = NULL;
  db->part    = part;
  db->cutoff  = cutoff;
  db->all     = all;
  db->ofirst  = ofirst;
  db->bfirst  = bfirst;

  if (part <= 0)
    { db->reads = (HITS_READ *) Malloc(sizeof(HITS_READ)*(nreads+1),"Allocating Open_DB index");
      if (fread(db->reads,sizeof(HITS_READ),nreads,index) != (size_t) nreads)
        SYSTEM_ERROR
    }
  else
    { HITS_READ *reads;
      int        i, r, maxlen;
      int64      totlen;

      nreads = olast-ofirst;
      reads  = (HITS_READ *) Malloc(sizeof(HITS_READ)*(nreads+1),"Allocating Open_DB index");

      fseeko(index,sizeof(HITS_READ)*ofirst,SEEK_CUR);
      if (fread(reads,sizeof(HITS_READ),nreads,index) != (size_t) nreads)
        SYSTEM_ERROR

      totlen = 0;
      maxlen = 0;
      for (i = 0; i < nreads; i++)
        { r = reads[i].rlen;
          totlen += r;
          if (r > maxlen)
            maxlen = r;
        }

      db->maxlen = maxlen;
      db->totlen = totlen;
      db->reads  = reads;
    }

  db->nreads = nreads;
  db->path   = Strdup(Catenate(pwd,PATHSEP,root,""),"Allocating Open_DB path");
  db->bases  = NULL;
  db->loaded = 0;

  status = isdam;

exit2:
  fclose(index);
exit1:
  fclose(dbvis);
exit:
  if (bptr != NULL)
    *bptr = '.';

  free(pwd);
  free(root);

  return (status);
}


// Trim the DB or part thereof and all loaded tracks according to the cuttof and all settings
//   of the current DB partition.  Reallocate smaller memory blocks for the information kept
//   for the retained reads.

void Trim_DB(HITS_DB *db)
{ int         i, j, r;
  int         allflag, cutoff;
  int64       totlen;
  int         maxlen, nreads;
  HITS_TRACK *record;
  HITS_READ  *reads;

  if (db->trimmed) return;

  if (db->cutoff <= 0 && db->all) return;

  cutoff = db->cutoff;
  if (db->all)
    allflag = 0;
  else
    allflag = DB_BEST;

  reads  = db->reads;
  nreads = db->nreads;

  for (record = db->tracks; record != NULL; record = record->next)
  { int   *anno4, size;
    int64 *anno8;
    char  *anno, *data;

    size = record->size;
    data = (char *) record->data; 
    if (data == NULL)
    { anno = (char *) record->anno;
      j = 0;
      for (i = r = 0; i < db->nreads; i++, r += size)
        if ((reads[i].flags & DB_BEST) >= allflag && reads[i].rlen >= cutoff)
        { memmove(anno+j,anno+r,size);
          j += size;
        }
        memmove(anno+j,anno+r,size);
    }
    else if (size == 4)
    { int ai;

      anno4 = (int *) (record->anno);
      j = anno4[0] = 0;
      for (i = 0; i < db->nreads; i++)
        if ((reads[i].flags & DB_BEST) >= allflag && reads[i].rlen >= cutoff)
        { ai = anno4[i];
          anno4[j+1] = anno4[j] + (anno4[i+1]-ai);
          memmove(data+anno4[j],data+ai,anno4[i+1]-ai);
          j += 1;
        }
      record->data = Realloc(record->data,anno4[j],NULL);
    }
    else // size == 8
    { int64 ai;

      anno8 = (int64 *) (record->anno);
      j = anno8[0] = 0;
      for (i = 0; i < db->nreads; i++)
        if ((reads[i].flags & DB_BEST) >= allflag && reads[i].rlen >= cutoff)
        { ai = anno8[i];
          anno8[j+1] = anno8[j] + (anno8[i+1]-ai);
          memmove(data+anno8[j],data+ai,anno8[i+1]-ai);
          j += 1;
        }
      record->data = Realloc(record->data,anno8[j],NULL);
    }
    record->anno = Realloc(record->anno,record->size*(j+1),NULL);
  }

  totlen = maxlen = 0;
  for (j = i = 0; i < nreads; i++)
    { r = reads[i].rlen;
      if ((reads[i].flags & DB_BEST) >= allflag && r >= cutoff)
        { totlen += r;
          if (r > maxlen)
            maxlen = r;
          reads[j++] = reads[i];
        }
    }
  
  db->totlen  = totlen;
  db->maxlen  = maxlen;
  db->nreads  = j;
  db->trimmed = 1;

  if (j < nreads)
    db->reads = Realloc(reads,sizeof(HITS_READ)*(j+1),NULL);
}

// Shut down an open 'db' by freeing all associated space, including tracks and QV structures, 
//   and any open file pointers.  The record pointed at by db however remains (the user
//   supplied it and so should free it).

void Close_DB(HITS_DB *db)
{ HITS_TRACK *t, *p;

  if (db->loaded)
    free(((char *) (db->bases)) - 1);
  else if (db->bases != NULL)
    fclose((FILE *) db->bases);
  free(db->reads);
  free(db->path);

  for (t = db->tracks; t != NULL; t = p)
    { p = t->next;
      free(t->anno);
      free(t->data);
      free(t);
    }
}

/*******************************************************************************************
 *
 *  TRACK LOAD & CLOSE ROUTINES
 *
 ********************************************************************************************/

//  Return status of track:
//     1: Track is for trimmed DB
//     0: Track is for untrimmed DB
//    -1: Track is not the right size of DB either trimmed or untrimmed
//    -2: Could not find the track 

int Check_Track(HITS_DB *db, char *track)
{ FILE       *afile;
  int         tracklen;

  afile = fopen(Catenate(db->path,".",track,".anno"),"r");
  if (afile == NULL)
    return (-2);

  if (fread(&tracklen,sizeof(int),1,afile) != 1)
    SYSTEM_ERROR

  fclose(afile);

  if (tracklen == db->breads)
    return (1);
  else if (tracklen == db->oreads)
    return (0);
  else
    return (-1);
}

// If track is not already in the db's track list, then allocate all the storage for it,
//   read it in from the appropriate file, add it to the track list, and return a pointer
//   to the newly created HITS_TRACK record.  If the track does not exist or cannot be
//   opened for some reason, then NULL is returned.

HITS_TRACK *Load_Track(HITS_DB *db, char *track)
{ FILE       *afile, *dfile;
  int         tracklen, size;
  int         nreads;
  void       *anno;
  void       *data;
  HITS_TRACK *record;

  if (track[0] == '.')
    { fprintf(stderr,"Track names cannot begin with a .\n");
      exit (1);
    }

  for (record = db->tracks; record != NULL; record = record->next)
    if (strcmp(record->name,track) == 0)
      return (record);

  afile = fopen(Catenate(db->path,".",track,".anno"),"r");
  if (afile == NULL)
    return (NULL);
  dfile = fopen(Catenate(db->path,".",track,".data"),"r");

  if (fread(&tracklen,sizeof(int),1,afile) != 1)
    SYSTEM_ERROR
  if (fread(&size,sizeof(int),1,afile) != 1)
    SYSTEM_ERROR

  if (db->trimmed)
    { if (tracklen != db->breads)
        { fprintf(stderr,"%s: Track %s not same size as database !\n",Prog_Name,track);
          exit (1);
        }
      if (db->part > 0)
        fseeko(afile,size*db->bfirst,SEEK_CUR);
    }
  else
    { if (tracklen != db->oreads)
        { fprintf(stderr,"%s: Track %s not same size as database !\n",Prog_Name,track);
          exit (1);
        }
      if (db->part > 0)
        fseeko(afile,size*db->ofirst,SEEK_CUR);
    }
  nreads = db->nreads;

  anno = (void *) Malloc(size*(nreads+1),"Allocating Track Anno Vector");

  if (size > 0)
    { if (dfile == NULL)
        { if (fread(anno,size,nreads,afile) != (size_t) nreads)
            SYSTEM_ERROR
        }
      else
        { if (fread(anno,size,nreads+1,afile) != (size_t) (nreads+1))
            SYSTEM_ERROR
        }
    }
  else
    SYSTEM_ERROR

  if (dfile != NULL)
    { int64 *anno8, off8, dlen;
      int   *anno4, off4;
      int    i;

      if (size == 4)
        { anno4 = (int *) anno;
          off4  = anno4[0];
          if (off4 != 0)
            { for (i = 0; i <= nreads; i++)
                anno4[i] -= off4;
              fseeko(dfile,off4,SEEK_SET);
            }
          dlen = anno4[nreads];
          data = (void *) Malloc(dlen,"Allocating Track Data Vector");
        }
      else
        { anno8 = (int64 *) anno;
          off8  = anno8[0];
          if (off8 != 0)
            { for (i = 0; i <= nreads; i++)
                anno8[i] -= off8;
              fseeko(dfile,off8,SEEK_SET);
            }
          dlen = anno8[nreads];
          data = (void *) Malloc(dlen,"Allocating Track Data Vector");
        }
      if (dlen > 0)
        { if (fread(data,dlen,1,dfile) != 1)
            SYSTEM_ERROR
        }
      fclose(dfile);
    }
  else
    data = NULL;

  fclose(afile);

  record = (HITS_TRACK *) Malloc(sizeof(HITS_TRACK),"Allocating Track Record");
  record->name = Strdup(track,"Allocating Track Name");
  record->data = data;
  record->anno = anno;
  record->size = size;

  if (db->tracks != NULL && strcmp(db->tracks->name,".@qvs") == 0)
    { record->next     = db->tracks->next;
      db->tracks->next = record;
    }
  else
    { record->next = db->tracks;
      db->tracks   = record;
    }

  return (record);
}

void Close_Track(HITS_DB *db, char *track)
{ HITS_TRACK *record, *prev;

  prev = NULL;
  for (record = db->tracks; record != NULL; record = record->next)
    { if (strcmp(record->name,track) == 0)
        { free(record->anno);
          free(record->data);
          free(record->name);
          if (prev == NULL)
            db->tracks = record->next;
          else
            prev->next = record->next;
          free(record);
          return;
        }
      prev = record;
    }
  return;
}


/*******************************************************************************************
 *
 *  READ BUFFER ALLOCATION AND READ ACCESS
 *
 ********************************************************************************************/

// Allocate and return a buffer big enough for the largest read in 'db', leaving room
//   for an initial delimiter character

char *New_Read_Buffer(HITS_DB *db)
{ char *read;

  read = (char *) Malloc(db->maxlen+4,"Allocating New Read Buffer");
  if (read == NULL)
    exit (1);
  return (read+1);
}

// Load into 'read' the i'th read in 'db'.  As an upper case ASCII string if ascii is 2, as a
//   lower-case ASCII string is ascii is 1, and as a numeric string over 0(A), 1(C), 2(G), and
//   3(T) otherwise.
//
// **NB**, the byte before read will be set to a delimiter character!

void Load_Read(HITS_DB *db, int i, char *read, int ascii)
{ FILE      *bases  = (FILE *) db->bases;
  int64      off;
  int        len, clen;
  HITS_READ *r = db->reads;

  if (bases == NULL)
    { db->bases = (void *) (bases = Fopen(Catenate(db->path,"","",".bps"),"r"));
      if (bases == NULL)
        exit (1);
    }
  if (i >= db->nreads)
    { fprintf(stderr,"%s: Index out of bounds (Load_Read)\n",Prog_Name);
      exit (1);
    }

  off = r[i].boff;
  len = r[i].rlen;

  if (ftello(bases) != off)
    fseeko(bases,off,SEEK_SET);
  clen = COMPRESSED_LEN(len);
  if (clen > 0)
    { if (fread(read,clen,1,bases) != 1)
        SYSTEM_ERROR
    }
  Uncompress_Read(len,read);
  if (ascii == 1)
    { Lower_Read(read);
      read[-1] = '\0';
    }
  else if (ascii == 2)
    { Upper_Read(read);
      read[-1] = '\0';
    }
  else
    read[-1] = 4;
}

char *Load_Subread(HITS_DB *db, int i, int beg, int end, char *read, int ascii)
{ FILE      *bases  = (FILE *) db->bases;
  int64      off;
  int        len, clen;
  int        bbeg, bend;
  HITS_READ *r = db->reads;

  if (bases == NULL)
    { db->bases = (void *) (bases = Fopen(Catenate(db->path,"","",".bps"),"rm"));
      if (bases == NULL)
        exit (1);
    }
  if (i >= db->nreads)
    { fprintf(stderr,"%s: Index out of bounds (Load_Read)\n",Prog_Name);
      exit (1);
    }

  bbeg = beg/4;
  bend = (end-1)/4+1;

  off = r[i].boff + bbeg;
  len = end - beg;

  if (ftello(bases) != off)
    fseeko(bases,off,SEEK_SET);
  clen = bend-bbeg;
  if (clen > 0)
    { if (fread(read,clen,1,bases) != 1)
        SYSTEM_ERROR
    }
  Uncompress_Read(4*clen,read);
  read += beg%4;
  read[len] = 4;
  if (ascii == 1)
    { Lower_Read(read);
      read[-1] = '\0';
    }
  else if (ascii == 2)
    { Upper_Read(read);
      read[-1] = '\0';
    }
  else
    read[-1] = 4;

  return (read);
}


/*******************************************************************************************
 *
 *  BLOCK LOAD OF ALL READS (PRIMARILY FOR DALIGNER)
 *
 ********************************************************************************************/

// Allocate a block big enough for all the uncompressed sequences, read them into it,
//   reset the 'off' in each read record to be its in-memory offset, and set the
//   bases pointer to point at the block after closing the bases file.  If ascii is
//   non-zero then the reads are converted to ACGT ascii, otherwise the reads are left
//   as numeric strings over 0(A), 1(C), 2(G), and 3(T).

void Read_All_Sequences(HITS_DB *db, int ascii)
{ FILE      *bases  = (FILE *) db->bases;
  int        nreads = db->nreads;
  HITS_READ *reads = db->reads;
  void     (*translate)(char *s);

  char  *seq;
  int64  o, off;
  int    i, len, clen;

  if (bases == NULL)
    db->bases = (void *) (bases = Fopen(Catenate(db->path,"","",".bps"),"r"));
  else
    rewind(bases);

  seq = (char *) Malloc(db->totlen+nreads+4,"Allocating All Sequence Reads");

  *seq++ = 4;

  if (ascii == 1)
    translate = Lower_Read;
  else
    translate = Upper_Read;

  o = 0;
  for (i = 0; i < nreads; i++)
    { len = reads[i].rlen;
      off = reads[i].boff;
      if (ftello(bases) != off)
        fseeko(bases,off,SEEK_SET);
      clen = COMPRESSED_LEN(len);
      if (clen > 0)
        { if (fread(seq+o,clen,1,bases) != 1)
            SYSTEM_ERROR
        }
      Uncompress_Read(len,seq+o);
      if (ascii)
        translate(seq+o);
      reads[i].boff = o;
      o += (len+1);
    }
  reads[nreads].boff = o;

  fclose(bases);

  db->bases  = (void *) seq;
  db->loaded = 1;
}

int List_DB_Files(char *path, void foreach(char *path, char *extension))
{ int            status, rlen, dlen;
  char          *root, *pwd, *name;
  int            isdam;
  DIR           *dirp;
  struct dirent *dp;

  status = 0;
  pwd    = PathTo(path);
  root   = Root(path,".db");
  rlen   = strlen(root);

  if (root == NULL || pwd == NULL)
    { status = 1;
      goto exit;
    }

  if ((dirp = opendir(pwd)) == NULL)
    { status = 1;
      goto exit;
    }

  isdam = 0;
  while ((dp = readdir(dirp)) != NULL)     //   Get case dependent root name (if necessary)
    { name = dp->d_name;
      if (strcmp(name,Catenate("","",root,".db")) == 0)
        break;
      if (strcmp(name,Catenate("","",root,".dam")) == 0)
        { isdam = 1;
          break;
        }
      if (strcasecmp(name,Catenate("","",root,".db")) == 0)
        { strncpy(root,name,rlen);
          break;
        }
      if (strcasecmp(name,Catenate("","",root,".dam")) == 0)
        { strncpy(root,name,rlen);
          isdam = 1;
          break;
        }
    }
  if (dp == NULL)
    { status = 1;
      closedir(dirp);
      goto exit;
    }

  if (isdam)
    foreach(Catenate(pwd,"/",root,".dam"),"dam");
  else
    foreach(Catenate(pwd,"/",root,".db"),"db");

  rewinddir(dirp);                         //   Report each auxiliary file
  while ((dp = readdir(dirp)) != NULL)
    { name = dp->d_name;
      dlen = strlen(name);
#ifdef HIDE_FILES
      if (name[0] != '.')
        continue;
      dlen -= 1;
      name += 1;
#endif
      if (dlen < rlen+1)
        continue;
      if (name[rlen] != '.')
        continue;
      if (strncmp(name,root,rlen) != 0)
        continue;
      foreach(Catenate(pwd,PATHSEP,name,""),name+(rlen+1));
    }
  closedir(dirp);

exit:
  free(pwd);
  free(root);
  return (status);
}

void Print_Read(char *s, int width)
{ int i;

  if (s[0] < 4)
    { for (i = 0; s[i] != 4; i++)
        { if (i%width == 0 && i != 0)
            printf("\n");
          printf("%d",s[i]);
        }
      printf("\n");
    }
  else
    { for (i = 0; s[i] != '\0'; i++)
        { if (i%width == 0 && i != 0)
            printf("\n");
          printf("%c",s[i]);
        }
      printf("\n");
    }
}
