不错的流式压缩算法

类别:编程语言 点击:0 评论:0 推荐:

//#include "vec.h"
#include "def.h"

#include <stdlib.h>
#include <memory.h>
#include <math.h>

#define NEW_PUTNBITS /* putnbits() puts multiple bits at a time, instead of multiple calls to putbit() for each bit */

#ifdef DEB_malloc
 void *malloc(size_t tSize)
 {
  void *vpTmp = NULL;
  
  if(((int)tSize) <= 0)
  {
   fprintf(stderr, "Trying to allocate %d bytes, but thats Nonsense...", (int)tSize);
   fflush(NULL);
   return NULL;
  }
  
  vpTmp = malloc(tSize);
  if(vpTmp == NULL)
  {
   fprintf(stderr, "Trying to allocate %d bytes, but mem is not available...", (int)tSize);
   fflush(NULL);
  }

  return(vpTmp);
 }
#endif

/* Faster way of computing floor log2 */
/* logn = (int) floor(L2 ((double) n)); */
int getlog2(unsigned int n)
{
 unsigned int tmp; 
 /* int logn = 31; */
 int logn = (sizeof(unsigned int)*8) - 1;
 
 /*printf("n: %i ", n);*/ /* debug */

 /* if most numbers will be small (0-255), this will save some time */
 if((n & 0xFF) == n)
 {
  logn = 7;
  n = n << ((sizeof(unsigned int)*8) - 8);
 }
 
 for(; logn > 0; logn--) /* find position of leftmost 1 of n */
 {
  tmp = n << 1;
  tmp = tmp >> 1; /* clear MSB */
  if(tmp == n) /* if top bit is not a 1 */
   n = n << 1;
  else
   break;
 }
 /*printf("logn: %i\n",logn);*/ /* debug */
 return logn;
}

/* Returns a pointer to the inverted file vector 
   The vector is read in from the vector file   
    *don't call initvec before this function, but initialise the
  new vector directly e.g. BITVEC vec={NUL,0,0,0,0,0,0}
    *resetvec will be called automatically after loading)
*/
int loadvector(long fileoffset, FILE *fpVectors, BITVEC *vp)
{
 if (fseek(fpVectors, fileoffset, SEEK_SET) != -1)
  return vbytereadvec(vp, fpVectors);
 else
  return -1;
}

/* Return length of vector */
int veclen(BITVEC *vp)
{
  return(vp->len);
}

/* Before a vector can be read (when loaded from file), it must be reset */
void resetvec(BITVEC *vp)
{
  vp->last = -1;
  vp->pos = vp->bit = 0;
  vp->cur = vp->vector[0];
}

void freevec(BITVEC *vp)
{
 if(vp != NULL)
 {
  free(vp->vector);
  vp->vector = NULL;
 }

 vp->size = 0;
  vp->last = -1;
  vp->bit = vp->pos = vp->len = vp->cur = 0;
}

/* Initialising a vector before insertion of data */
void initvec(BITVEC *vp, int initveclen)
{
  vp->vector = malloc(initveclen * sizeof(char));
  vp->size = initveclen;
  vp->last = -1;
  vp->bit = vp->pos = vp->len = vp->cur = 0;
}

/* Write the contents of a vector to file.  This version does not use
   vbtyewrite for the vector length, so if the vector is to be opened
  with loadvector, use vbytedumpvec (below) instead.
   Assumes that padvec has been called. */
void dumpvec(BITVEC *vp, FILE *fp)
{
#ifdef DEBUG
  int i;

  resetvec(vp);
  fprintf(stderr,"[", i);
  while( (i=getrunlen(vp)) >=0 )
    fprintf(stderr," %d", i);

  fprintf(stderr,"] {", i);
  resetvec(vp);
 
  while( (i=getdelta(vp)) >=0 )
   fprintf(stderr," %d", i);

  fprintf(stderr,"}", i);
#endif  /* DEBUG */

  fwrite(&vp->len, sizeof(int), 1, fp);
  fwrite(vp->vector, sizeof(char), vp->len/8 + 1, fp);
}

/* This is a different version of dumpvec that uses vbyteread to
   get the vector length... this is the operation that is 'undone'
  in vbytereadvec (which is called by loadvector)...
  Assumes that padvec has been called. */
int vbytedumpvec(BITVEC *vp, FILE *fp)
{
  vbytewrite(vp->len, fp);
  return(fwrite(vp->vector, sizeof(char), vp->len/8 + 1, fp));
}

/* Read a vector back from file */
/* Returns -1 on error */
int vbytereadvec(BITVEC *vp, FILE *fp)
{
  int len;

  if ((vp->len = vbyteread(fp)) == -1)
  {
    fprintf(stderr, "vbytereadvec: Error reading vbyte coded vector length %d\n", vp->len);
  return(-1);
  } 
  vp->size = 4 * ( ( vp->len/8 + 8 ) / 4 ); /* word boundaries */
  len = vp->len/8 + 1;
  vp->vector = malloc(vp->size * sizeof(char));
  if (fread(vp->vector, sizeof(char), len, fp) <= 0)
  {
    fprintf(stderr, "vbytereadvec: Error reading vector (length: %d)\n", len);
  return(-1);
 }   

  resetvec(vp);
  return(len);
}

/* Read a vector back from file */
/* Returns -1 on error */
int readvec(BITVEC *vp, FILE *fp)
{
  int  len;

  fread(&vp->len, sizeof(int), 1, fp);
  vp->size = 4 * ( ( vp->len/8 + 8 ) / 4 ); /* word boundaries */
  len = vp->len/8 + 1;
  vp->vector = malloc(vp->size * sizeof(char));

 if(vp->vector != NULL)
 {
  fread(vp->vector, sizeof(char), len, fp);
  resetvec(vp);
 }
 else
 {
  printf("Can not allocate %i bytes...:(\n", vp->size * sizeof(char));
  return -1;
 }

  return(0);
}
/* Read a vector back from file -- used in cafezipread.c */
/* Returns -1 on error */
int readzipvec(BITVEC *vp, FILE *fp)
{
  int  len;

 len = vbyteread(fp);
 initvec(vp, len+1);
 
 vp->len = len;
 
 if (vp->len > 11)
 {
    len = vp->len/8 + 1;
   fread(vp->vector, sizeof(char), len, fp);
   resetvec(vp);
  }
  return(vp->len);
}


int roadzipvec(BITVEC *vp, char *buf,int buflen)
{
    int  len;

 len = buflen*10;
 initvec(vp, len+1);
 
 vp->len = len;
 
 //if (vp->len > 11)
 //{
      //len = vp->len/8 + 1;
      memcpy(vp->vector,buf,buflen);
   //fread(vp->vector, sizeof(char), len, fp);
   resetvec(vp);
//  }
  return(vp->len);
}


int putunary(BITVEC *vp, int n)
{
 int ret;
 
  ret = putnbits(vp, 0, n);
  /*ret += putbit(vp, 1);*/
  ret += putnbits(vp, 1, 1);
 
 return(ret);
}

int getunary(BITVEC *vp)
{
 int ret = 0;
 
 for(;getbit(vp) != 1;ret++);
 
 return(ret);
}

void initunarylookup(char *table)
{
 /* Must be passed a char array of size [255].
   This is initialised with the appropriate values for
   the getunary2 lookup */
 int d;
 
 for (d = 255; d >= 0; d--)
 {
  if (d > 127)
   table[d] = 0;
  else if (d > 63)
   table[d] = 1;
  else if (d > 31)
   table[d] = 2;
  else if (d > 15)
   table[d] = 3;
  else if (d > 7)
   table[d] = 4;
  else if (d > 3)
   table[d] = 5;
  else if (d > 1)
   table[d] = 6;
  else if (d > 0)
   table[d] = 7;
  else
   table[d] = -1;
 }
}

int getunary2(BITVEC *vp, char *table)
{
 /* Must use initunarylookup before calling this! */
 
 int temp, ret = 0;
 unsigned char b;
 while ((b = vp->cur << vp->bit) == 0)
 { /* while (the relevant bits in this byte are all zero) */
  ret += 8 - vp->bit;  /* Increase ret by the number of zero bits in this byte */
  vp->pos++; /* Update bitvec positions to point at start of next byte */
  vp->bit = 0;
  vp->cur = vp->vector[vp->pos];
 }
 temp = table[b]; /* look up the unary number in this byte */
 vp->bit += temp+1; /* update bitvec position */
 return (ret + temp);
}

/* Add the number n to the vector, assuming run-length coding */
int putrunlen(BITVEC *vp, int n)
{
  int  ret;

  if( n <= vp->last )
 return(0);  /* hack: don't add the same code twice */

  ret = putdelta(vp, n - vp->last);
  vp->last = n;
  return(ret);
}

/* Get a number from the vector, assuming run length coding */
int getrunlen(BITVEC *vp)
{
  int ret;

  ret = getdelta(vp);
  if( ret > 0 )
  {
 vp->last += ret;
 return(vp->last);
  }
  else
 return(-1);
}

/* Standard delta (Elias) */
int putdelta(BITVEC *vp, int n)
{
 int  mag;
 int  ret;
// if(n < 1 || n > 33554430) {
//  fprintf(stderr, "putdelta: Error, range for delta coding exceeded (%i)\n", n);
//  return -1;
// }

 mag = getlog2((unsigned int) n);
 
 ret = putgamma(vp, mag+1);
 ret += putnbits(vp, n, mag); /* don't output leftmost (ie, top) bit */
   
 return(ret);
}

/* Returns -1 on eof */
int getdelta(BITVEC *vp)
{
  int  mag, val;

  mag = getgamma(vp) - 1;

  if( mag < 0 ) return(-1);
 
  val = getnbits(vp, mag);
 
  if( val < 0 ) return(-1);

  return( ( 1 << mag ) | val );
}


int scandelta(BITVEC *vp)
{
  int  mag;

  mag = getgamma(vp) - 1;

 /* Got the length, now just scan over the rest... */
 
 while (mag > (7 - vp->bit))
 {
  vp->pos++;
  mag -= 8;
 }
 
 vp->bit += mag;
 vp->cur = vp->vector[vp->pos];
 
  return( 0 );
}


/* Standard gamma (Elias) */
int putgamma(BITVEC *vp, int n)
{
  int mag;
  int ret;
 
 if(n < 1 || n > 33554430)
 {
//  fprintf(stderr, "putgamma: Error, range for gamma coding exceeded (%i)!!!\n", n);
  return -1;
 }

  mag = getlog2((unsigned int) n);
 
  ret = putnbits(vp, 0, mag);
  ret += putnbits(vp, n, mag+1);

  return(ret);
}

/* Returns -1 on eof */
int getgamma(BITVEC *vp)
{
  int  b, mag, val;

 for( mag=0 ; ( b = getbit(vp) ) == 0 ; mag++ );

    if( b < 0 ) return(-1);

    val = getnbits(vp, mag);

    if( val < 0 ) return(-1);

    return( ( 1 << mag ) | val );
}


/* Puts integer n into vector using variable-byte coding
 * Coding scheme: use first bit of a byte to indicate if more
 * bytes follow (1) or not (0). Use other 7 bits of each byte
 * to store the integer. E.g.
 * 154 = 10011010 in binary
 *     = 1 0011010  0 0000001 in variable-byte
 * where bits 1 and 9 are 'indicator' bits,     
 * bits 2-8 represent the 7 least-significant bits of the integer, and
 * bits 10-16 represent the 7 most-significant bits of the integer
 */     
int OLDputvbyte(BITVEC *vp, int n)
{
 int ret = 0;

 if (n < 0)
 {
  printf("Error: putvbyte tried to put value %i", n);
  exit(1);
 } 

 while (n >= 128)
 {
  /*ret += putbit(vp, 0x1);*/
  ret += putnbits(vp, 0x1, 1);
  ret += putnbits(vp, n&0xff, 7);
  n >>= 7;
 }
 /*ret += putbit(vp, 0x0);*/
 ret += putnbits(vp, 0x0, 1);
 ret += putnbits(vp, n&0xff, 7);
 
 return ret;
}

/* Offsets are on byte boundary, so can get value directly from
vector, which is faster than via putnbits as used by OLDputvbyte. */ 
int putvbyte(BITVEC *vp, unsigned long n)
{
 int ret = 0;


// if (n < 0)
// {
//  printf("Error: putvbyte tried to put value %i", n);
//  exit(1);
// } 

 while (n >= 128)
 {
  if( vp->pos >= vp->size )
   expandvec(vp);
  
  vp->vector[vp->pos++] =(unsigned char) (n & 0x7f) | 0x80;
  n >>= 7;
  ret += 8;
 }
 if( vp->pos >= vp->size )
  expandvec(vp);
 
 vp->vector[vp->pos++] =(unsigned char) (n & 0x7f) & 0x7f;
 ret += 8;
 
 return ret;
}

int getvbyte(BITVEC *vp)
{
 int ret = 0x0, count = 0, get = 0;
 
 while(((get = getnbits(vp, 8)) & 0x80) == 0x80)
 {
/* For each time we get a group of 7 bits, need to left-shift the latest group by
   an additional 7 places, since the groups were effectively stored in reverse order.*/  
  ret |= ((get & 0x7f) << count);
  count += 7;
 }
/* Now get the final 7 bits, which may again need to be left-shifted by a factor of 7. */
 return (ret |= (get & 0x7f) << count);
}

/* getvbyte3: offsets are on byte boundary so can get value directly from
vector, which is faster than via getnbits as used by getvbyte.
This version uses shifting instead of masking as in getvbyte2*/ 
int getvbyte3(BITVEC *vp)
{
    int ret = 0x0, count = 0;
    unsigned char get = 0, temp = 0;

    get = vp->vector[vp->pos++];
    temp = get << 1;
    temp = temp >> 1; /* clear MSB */

    while(get != temp) /* while top bit is a 1 */
    {
      ret |= temp << count;
      count += 7;
      get = vp->vector[vp->pos++];
      temp = get << 1;
      temp = temp >> 1; /* clear MSB */
    }

#ifdef DEBUG_GETVBYTE 
 ret |= temp << count;

   printf("Offset: %i\n",ret);
   if (ret < 0)
   {
    printf("Error: getvbyte() retrieved value %i", ret);
    exit(1);
   }
#endif
       return ret;

/* getvbyte2: offsets are on byte boundary so can get value directly from
vector, which is faster than via getnbits as used by getvbyte */ 
/* This is the fastest */
int getvbyte2(BITVEC *vp)
{
        int ret = 0x0, count = 0, get = 0;
 
        while( ((get = vp->vector[vp->pos++]) & 0x80) == 0x80 )
        {
/* For each time we get a group of 7 bits, need to left-shift the latest group by
   an additional 7 places, since the groups were effectively stored in reverse order.*/
                ret |= ((get & 0x7f) << count);
                count += 7;
        }
/* Now get the final 7 bits, which need to be left-shifted by a factor of 7. */
    ret |= (get & 0x7f) << count;
    /* vp->cur = vp->vector[vp->pos]; */
#undef DEBUG_GETVBYTE
#ifdef DEBUG_GETVBYTE
   printf("Offset: %i\n",ret);
   if (ret < 0)
   {
    printf("Error: getvbyte() retrieved value %i", ret);
    exit(1);
   }
#endif
       return ret;

void scanvbyte(BITVEC *vp)
{
/* Use this instead of getvbyte when don't care abour the offset value, but just
want to progress along the vector */
  while(((vp->cur >> (7 - vp->bit) ) & 0x1) == 0x1)
  { /* while (the sign-bit in the bitvector is 1) */
   vp->pos++; /* jump forward 1 byte in vp */
   vp->cur = vp->vector[vp->pos];   
  }
  /* next bit must be a 0 sign-bit, so jump forward another byte to the end of this v-byte */
  vp->pos++;
  vp->cur = vp->vector[vp->pos];   
}

/* scanvbyte2: offsets are on byte boundary */
void scanvbyte2(BITVEC *vp)
{
 
 while( (vp->vector[vp->pos++] & 0x80) == 0x80 )
 { /* while (the sign-bit in the bitvector is 1) */
  /* do nothing */
 }
 vp->cur = vp->vector[vp->pos];
}

void OLDscanvbyte(BITVEC *vp)
{
 int get = 0;
 
 while(((get = getnbits(vp, 8)) & 0x80) == 0x80)
 {
  ; /* Do nothing! */
 }
}

int AlignVbyteWrite(BITVEC *vp2)
{
 if (vp2->bit != 0) /* start offsets on byte boundary */
 {
   putnbits(vp2,0,8 - vp2->bit);
 } 
    return 0;
}

int AlignVbyteRead(BITVEC *vp)
{
 if (vp->bit != 0) /* freq is on byte boundary */
 {
  vp->bit = 0;
  vp->pos++;
  vp->cur = vp->vector[vp->pos];
 }
    return 0;
}

int OLDgetvbyte(BITVEC *vp)
{ /*no skipping */
  int ret = 0x0, count = 0;

 printf("NOT skip\n");
 fflush(NULL); 

  while(getbit(vp) == 0x1)
  {
/* For each time we get a group of 7 bits, need to left-shift the
  latest group by an additional 7 places, since the groups were effectively
  stored in reverse order.*/  
   ret |= ((getnbits(vp, 7) & 0x7f) << count);
   count += 7;
  }
 
/* Now get the final 7 bits, which may again need to be left-shifted by
  a factor of 7. */
  ret |= (getnbits(vp, 7) & 0x7f) << count;

  if (ret < 0)
  {
   printf("Error: getvbyte() retrieved value %i", ret);
   exit(1);
  } 
  
  return (ret);
}


/* -------------- Golomb Coding ---------------- */

/**Constant to signal unitnitialized Hash Table value**/
#define GOLHASH_UNINIT -1

/** This will store the maximum b_offset encountered durring
    index-build in putgolomb **/
static int iMaxBOffset = 0;

/* Standard Golomb */
/* x = value to be put in */
/* b = coding parameter */
int putgolomb(BITVEC *vp, int x, int b)
{
 int mag, ret, rem, q, d;

#ifdef DEBUG_GOLOMB
 if(x<1)
 {
  printf("vec::putgolomb Coding Error with %i\n", x);
  return -1;
 }
#endif

 if(b > iMaxBOffset) iMaxBOffset = b;

 /* work out floor of (x - 1)/b */
 mag = (int) floor(((double)(x - 1))/((double)b));  
 
#ifdef DEBUGGOLOMB
 printf("-----------\nstoring: %d with b: %d, mag: %d\n", x,b,mag);
#endif 

 /* Store mag+1 in unary */
 /* Think about it: mag=3 means storing 001, hence this DOES work */
 ret = putnbits(vp, 0, mag);

 /*ret += putbit(vp, 1);*/
 ret += putnbits(vp, 1, 1);
 
#ifdef DEBUGGOLOMB
 printf("stored mag of: %d\n", mag+1);
#endif

 /* What's the remainder? ie. x - (mag * b) */
 rem = x - (mag * b) - 1;

#ifdef DEBUGGOLOMB
 printf("rem: %d\n", rem);
#endif 
 
 /* Now work out the point at which coding requires floor(log(b))+1 bits */
 /* There are b remainders, so 2^k < b <= 2^(k+1) */
 /* Therefore, 2^k <= b and then k = log_2 b, so first k remainders */
 /* can be represented in floor(log_2 b) bits [phew] */
 
 q = getlog2((unsigned int) b);

 /* Now, work out the base value to use when we hit the toggle point (q)   */
 /* Since there are q+1 bits (max) used to represent a remainder, there    */
 /* can be 2^(q+1) such values using q+1 bits. Since there are b values    */
 /* we want to represent, if the remainder is less than 2^(q+1) - b, in other  */
 /* words if there are "spares", we can use q bits [phew again!].     */ 
 /* Geez, Golomb coding isn't fun.            */
 /* So, d = 2^(q+1) - b  (jz/sara figured this, thanks)      */

 d = (1 << (q+1)) - b;

#ifdef DEBUGGOLOMB
 printf("toggle point q: %d with a d of: %d\n", q, d);
#endif 

 /* Can this remainder be represented in floor(log_2 b) bits? */
 if (rem < d)
  /* Store the value of remainder directly */
  ret += putnbits(vp, rem, q); 
 else
 {

#ifdef DEBUGGOLOMB
  printf("actually storing: %d in toggle+1 (%d) bits\n", d+rem, q+1);
#endif 

  /* Store the value of (d + remainder) in floor(log_2 b) + 1 bits */
  ret += putnbits(vp, d+rem, q+1);
 }
 
#ifdef DEBUGGOLOMB
 printf("stored: %d bits\n-----------\n", ret);
#endif 

 return(ret); 
}

/* Constant to signal unitnitialized Hash Table value */
#define GOLHASH_UNINIT -1

/*  If hashing is to be used, call initgolhash before using getgolomb() */
int initgolhash(GOLHASH_ROOT *rpGolHashRoot, int iSize)
{
 int x;

 rpGolHashRoot->iSize         = iSize;
 rpGolHashRoot->ipGolHashTable = (int *)malloc(sizeof(int) * (iSize + 1));

 if(rpGolHashRoot->ipGolHashTable != NULL) /*Initialise hash table*/
 {
  /* Table initialised to 'empty', values are calculated the first time that we
  need them and then stored */
  for (x=0;x<=iSize;x++)
   rpGolHashRoot->ipGolHashTable[x] = GOLHASH_UNINIT;

  /* Or, use this to calculate all values at the beginning
  rpGolHashRoot->ipGolHashTable[x] = (int) floor(L2 ((double) x));
  */
 }
 else
 {
  printf("Can not allocate Golomb Hash Table (size %i)\n", sizeof(int) * (iSize + 1));
  rpGolHashRoot->iSize = 0;
  return false;
 }

 return true;
}

/* Standard Golomb -> b = coding parameter must be >= 1 */

/* If several Golomb codes are to be decompressed that share the same parameter b, more efficient to make use
   of getgolomb1 and getgolomb2 below */

int getgolomb(BITVEC *vp, int b, GOLHASH_ROOT *pGolHashRoot)
{
   int mag;
 int q, d,i;
 int rem;

 /* Get mag+1 in unary */
 /* By setting mag=0, we actually get mag [rather than mag+1, which */
 /* is what was stored] [phew!] */
 for( mag=0 ; (i = getbit(vp)) == 0 ; mag++ );

 if(i<0) return -1;

 if(pGolHashRoot != NULL)
  if(b < 0 || b > pGolHashRoot->iSize) /*** sanity check ***/
  {
   printf("b value: %i is outside of TABLESIZE: %i\n", b, pGolHashRoot->iSize);
   fflush(NULL);
   return -3;
  }

 q = GOLHASH_UNINIT;
 
 /* Try and look up the log_2(b)... if not there, calculate and store */
 if(pGolHashRoot != NULL)
  q = pGolHashRoot->ipGolHashTable[b];
 
 if(q == GOLHASH_UNINIT)
 {
  /* Now work out the toggle point for remainders as before */
  q = getlog2((unsigned int) b);
  if(pGolHashRoot != NULL)
   pGolHashRoot->ipGolHashTable[b] = q;
 }

 /* Get the first q bits (we may need (q+1) actually) */
 if((rem = getnbits(vp, q)) < 0)
  return -1;

 /* Now, work out the base value to use for the toggle point (q) */
 /* It's d=2^(q+1) - b           (jz/sara figured this, thanks) */
 /* See my notes in putgolomb() */
 d = (1 << (q+1)) - b;

 /* Do we need to get an extra bit? */
 if (rem >= d)
 {
  /* If so:
     (1) shift rem left and OR with the bit we get
        (2) subtract d to get remainder */
  rem = ((rem << 1) | getbit(vp)) - d;
 }
 
 /* The value is the mag * b + remainder + 1 */
 return(mag * b + rem + 1); 
}


/* Standard Golomb -> b = coding parameter must be >= 1 */
int getgolomb1(int iItem, int iCount, GOLHASH_ROOT *pGolHashRoot, GOLOMBPARAMS *pgPar)
{
 int b, q;

 /* Compute b value */
 if((b = (long)(0.69 * ((double)(iItem) / (double) iCount)))<1) b=1;

 pgPar->b = b;
 
#if defined(CACHE_IN_ADVANCE) || defined(CACHE_IF_NEEDED)
 if(pGolHashRoot != NULL)
 {
  if(b < 0 || b > pGolHashRoot->iSize) /*** sanity check ***/
  {
   printf("b value: %i is outside of TABLESIZE: %i\n", b, pGolHashRoot->iSize);
   fflush(NULL);
   return false;
  }
  
  q = pGolHashRoot->ipGolHashTable[b];

  if(q == GOLHASH_UNINIT)
  {
   /* Now work out the toggle point for remainders as before */
   q = getlog2((unsigned int) b);
   if(pGolHashRoot != NULL) pGolHashRoot->ipGolHashTable[b] = q;
  }
 }
 else /* No caching table */
#endif
  q = getlog2((unsigned int) b);

 pgPar->q = q; /* Copy parameter to be returned */

 /* Now, work out the base value to use for the toggle point (q) */
 /* It's d=2^(q+1) - b           (jz/sara figured this, thanks) */
 /* See my notes in putgolomb() */
 pgPar->d = (1 << (q+1)) - b;

 return true;
}

/* Standard Golomb -> b = coding parameter must be >= 1 */
int getgolomb2(BITVEC *vp, GOLOMBPARAMS *gPar)
{
  int  mag, i, rem;
 /* Get mag+1 in unary */
 /* By setting mag=0, we actually get mag [rather than mag+1, which */
 /* is what was stored] [phew!] */
 for( mag=0 ; (i = getbit(vp)) == 0 ; mag++ );
/* mag = getunary2(vp, UNARYTABLE); */

#ifdef DEBUG_GOLOMB
 if(i<0) return -1;
#endif

 /* Get the first q bits (we may need (q+1) actually) */
 if((rem = getnbits(vp, gPar->q)) < 0) return -1;

 /* Do we need to get an extra bit? */
 if (rem >= gPar->d)
 {
  /* If so:
     (1) shift rem left and OR with the bit we get
        (2) subtract d to get remainder */
  rem = ((rem << 1) | getbit(vp)) - gPar->d;
 }
 
 /* The value is the mag * b + remainder + 1 */
 return(mag * gPar->b + rem + 1); 
}


/* --------------------------- RICE CODING ------------------------- */
/* Standard Rice encoding.
 
    Slightly inefficient: logb should be computed externally and passed in
    at each call, to avoid repetition.
*/
int
putrice(BITVEC *vp, int x, int b)
{
    int mag, ret, rem, logb;

  /* Record largest value of b, used to define hash-table size for getrice. */ 
   if(b > iMaxBOffset) iMaxBOffset = b;

    /* work out floor of (x - 1)/b */
   /* should be passed in as parameter */
    logb = getlog2((unsigned int) b);
  
  /* Ensure that b is a non-negative power of 2 (required for Rice coding) */
   b = 1 << logb;
    mag = ( x - 1 ) >> logb;
#ifdef DEBUGRICE
    printf("-----------\nstoring: %d with b: %d, mag: %d\n", x,b,mag);
#endif
 
    ret = putnbits(vp, 0, mag);
    /*ret += putbit(vp, 1);*/
   ret += putnbits(vp, 1, 1);
 
        /* What's the remainder? ie. x - (mag * b) */
    rem = x - (mag * b) - 1;
#ifdef DEBUGRICE
    printf("rem: %d\n", rem);
#endif
 
    ret += putnbits(vp, rem, logb);
#ifdef DEBUGRICE
    printf("stored: %d bits\n-----------\n", ret);
#endif
    return(ret);
}


/* Standard Rice decoding.
    NOTE.  b must be a non-negative power of 2 or this will fail.
 
    Slightly inefficient: logb should be computed externally and passed in
    at each call, to avoid repetition.
*/
int
getrice(BITVEC *vp, int b, GOLHASH_ROOT *pGolHashRoot)
{
    int         mag;
    int         logb;
    int         rem;
    int         ret;
 
    for( mag=0 ; getbit(vp) == 0 ; mag++ )
        ;
 
  if (pGolHashRoot != NULL)
  {
   if (b < 0 || b > pGolHashRoot->iSize)
   {
    printf("Warning (getrice): b value %i is outside of TABLESIZE %i\n", b, pGolHashRoot->iSize);
    fflush(NULL);
    return -3;
   }
   else
   { /* Try to look up log_2(b)... */
    logb = pGolHashRoot->ipGolHashTable[b];
    if (logb == GOLHASH_UNINIT)
    { /* it's not in the table yet, so calculate it and then add it to the table */
     logb = getlog2((unsigned int) b);
     pGolHashRoot->ipGolHashTable[b] = logb;
     }
    b = 1 << logb;
   }  
  }
  else
  { /* This is only used for the debugging function in merge.c, where it is possible
     that CACHE is defined, but pGolHashRoot is still NULL...
   In this case, just calculate logb each time. */
   logb = getlog2((unsigned int) b);
  }

    rem = getnbits(vp, logb);
 
    /* The value is the mag * b + remainder + 1 */
    ret =  mag * b + rem + 1;
 
    return(ret);
}

/* Standard Rice decoding */
/* getrice1 is used to compute the coding parameters b and logb (they are looked up
   in a table for efficiency) and stored in prPar */
int old_getrice1(int iItem, int iCount, GOLHASH_ROOT *pGolHashRoot, RICEPARAMS *prPar)
{
 int b, logb;

 /** Compute b Value for Rice coding parameters **/
 if((b = (long)(0.69 * ((double)(iItem) / (double) iCount)))<1) b=1;

 if(pGolHashRoot != NULL)
 {
  if(b < 0 || b > pGolHashRoot->iSize) /*** sanity check ***/
  {
   printf("b value: %i is outside of TABLESIZE: %i\n", b, pGolHashRoot->iSize);
   fflush(NULL);
   return false;
  }
  
  logb = pGolHashRoot->ipGolHashTable[b];

  if(logb == GOLHASH_UNINIT) /* Not in table, so calculate and store */
  {
   logb = (int) floor(L2 ((double) b));
   if(pGolHashRoot != NULL) pGolHashRoot->ipGolHashTable[b] = logb;
  }
 }
 else /** No caching table, so calculate each time **/
  logb = (int) floor(L2 ((double) b));

 prPar->logb = logb; /** Copy parameter to be returned **/

 /* Ensure that b is a non-negative power of 2, else Rice coding will fail */
 prPar->b = 1 << logb;
 
 return true;
}

/* Standard Rice decoding */
/* getrice1 is used to compute the coding parameters b and logb */
int getrice1(int iItem, int iCount, GOLHASH_ROOT *pGolHashRoot, RICEPARAMS *prPar)
{
 unsigned int b, logb;

 /** Compute b Value for Rice coding parameters **/
 if((b = (long)(0.69 * ((double)(iItem) / (double) iCount)))<1) b=1;

 logb = getlog2((unsigned int) b);

 prPar->logb = logb; /** Copy parameter to be returned **/

 /* Ensure that b is a non-negative power of 2, else Rice coding will fail */
 prPar->b = 1 << logb;
 
 return true;
}

/* Decode the actual Rice value */ 
int getrice2(BITVEC *vp, RICEPARAMS *rPar)
{
  int  mag, rem;
 
 /* Get unary part of code */
 for( mag=0 ; getbit(vp) == 0 ; mag++ )
  ;
/* mag = getunary2(vp, UNARYTABLE); */
 rem = getnbits(vp, rPar->logb);
  
 /* The values is the mag * b + remainder + 1 */
 
 return (mag * rPar->b + rem + 1);
 
}

/* Used where there are Rice codes in a sequential bitvector and these can be
   passed over without concern for their values */
int scanrice2(BITVEC *vp, RICEPARAMS *rPar)
{
  int mag, n = rPar->logb;

 for( mag=0 ; getbit(vp) == 0 ; mag++ )
  ;
/* getunary2(vp, UNARYTABLE); */

 /* Got the unary, now just scan over the rest... */
 
 while (n > (7 - vp->bit))
 {
  vp->pos++;
  n -= 8;
 }
 
 vp->bit += n;
 vp->cur = vp->vector[vp->pos];
 
 return 0;
}

#undef DEGUG_PUTNBITS

#ifdef NEW_PUTNBITS
/* New putnbits() puts multiple bits at a time, instead of multiple calls to putbit() for each bit */
int putnbits(BITVEC *vp, int number, int bits)
{
 unsigned char temp = 0;
 unsigned int n;
 int b, shift;
 b = bits;
 n = number;
#ifdef DEGUG_PUTNBITS 
// printf("n: %i, bits: %i\n", n, bits);
 if(bits > int_size)
  printf("putnbits(): number of bits %i > int_size %i\n", bits, int_size);
#endif
  /* This is needed because sometimes we are sent numbers that will not fit in the bits specified */
  /* e.g. number = 11 (= 1011) and bits = 3 */
  /* so need to clear upper bits not being put */
  n = n << (int_size - bits);
  n = n >> (int_size - bits);
 
 for(shift = 8-vp->bit ; bits >= shift ; bits -= shift, shift = 8-vp->bit)
 {
#ifdef DEGUG_PUTNBITS 
  printf("vp->pos = %i\tvp->bit = %i\tvp->cur = %X\n", vp->pos, vp->bit, vp->cur);
#endif

  if( vp->pos >= vp->size )
   expandvec(vp);
  vp->cur = vp->cur | (n >> (bits - shift));
  vp->vector[vp->pos] = vp->cur;
#ifdef DEGUG_PUTNBITS 
  printf("vp->pos = %i\tvp->bit = %i\tvp->vector[%i] = %X\n", vp->pos, vp->bit, vp->pos, vp->vector[vp->pos]);
#endif      
  n = n << (int_size - (bits - shift)); /* clear bits copied to vector */
  n = n >> (int_size - (bits - shift));

  vp->bit = 0;
  vp->pos++; /* move to next byte */
  
  vp->cur = 0;
  /*vp->vector[vp->pos] = 0;*/
 }

 /* Put remaining bits */
 if(bits > 0)
 {
#ifdef DEGUG_PUTNBITS 
  printf("vp->pos = %i\tvp->bit = %i\tvp->cur = %X\n", vp->pos, vp->bit, vp->cur);
#endif  

  vp->cur = vp->cur | (n << (shift - bits));
  /*vp->vector[vp->pos] = vp->cur;*/
  vp->bit += bits;
#ifdef DEGUG_PUTNBITS 
  printf("vp->pos = %i\tvp->bit = %i\tvp->vector[%i] = %X\n", vp->pos, vp->bit, vp->pos, vp->vector[vp->pos]);
#endif  
 } 
 return(b);
}

#else /* Old putnbits() */
/* Put n bits in vector, starting with nth from right, going to rightmost */
/* This is not efficient.  Should be recoded to insert several bits
   simultaneously ... but not really worth the effort ... */
int putnbits(BITVEC *vp, int n, int num)
{
 int i,ret;
#ifdef DEGUG_PUTNBITS
 printf("n: %i, num: %i\n", n, num);
 if(num > int_size)
  printf("putnbits(): number of bits %i > int_size %i\n", num, int_size);
  printf("vp->pos = %i\tvp->bit = %i\tvp->cur = %X\n", vp->pos, vp->bit, vp->cur);  
#endif
 for( ret=0, i=num-1 ; i>=0 ; i-- )
  ret += putbit(vp, ( n >> i ) & 0x1 );
#ifdef DEGUG_PUTNBITS 
  printf("vp->pos = %i\tvp->bit = %i\tvp->cur = %X\n", vp->pos, vp->bit, vp->cur);
#endif
 return(ret);
}
#endif /* NEW_PUTNBITS */


static int masks[9] = { 0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff };
/* New version of getnbits that does not need to call getbit */
int getnbits(BITVEC *vp, int num)
{
    int  mask, shift, val;
  unsigned char  temp;

    val = 0;
  
    for( shift = 8-vp->bit ; num >= shift ; num -= shift, shift = 8-vp->bit )
  /* copy out whole of cur */
    {
   if( 8*vp->pos + vp->bit >= vp->len)
     return(-1);
   mask = masks[shift];
   val = ( val << shift ) | ( vp->cur & mask );
   vp->bit = 0;
   vp->pos++;
   vp->cur = vp->vector[vp->pos];
    }

 /* Get any remaining bits */
 if ( num > 0 )
 {
      temp = vp->cur;
  temp = temp << vp->bit;
  temp = temp >> ( 8 -  num );
  val = ( val << num ) | temp;
  vp->bit += num;
 } 
   
#ifdef VECDEBUG
    if( val < 0 )
  printf("Panic in getnbits: overflow\n");
#endif /* VECDEBUG */

    return( ( val<0 ) ? -1 : val );
}

/* Get n bits from vector */
/* Rather than calling getbit for each bit, shifts several bits at
   once if this would empty the current byte; only makes it one or
   two percent faster. */
int OLDgetnbits(BITVEC *vp, int num)
{
    int  mask, shift, val, b;

    b = val = 0;

#if true
    for( shift = 8-vp->bit ; num >= shift ; num -= shift, shift = 8-vp->bit )
  /* copy out whole of cur */
    {
   if( 8*vp->pos + vp->bit >= vp->len)
     return(-1);
   mask = masks[shift];
   val = ( val << shift ) | ( vp->cur & mask );
   vp->bit = 0;
   vp->pos++;
   vp->cur = vp->vector[vp->pos];
    }
#endif /* true */

 /* Get any remaining bits */
    for( ; num>0 && ( b = getbit(vp) ) >= 0 ; num-- )
  val = ( val << 1 ) | ( b & 0x1 );
#ifdef VECDEBUG
    if( val < 0 )
 printf("Panic in getnbits: overflow\n");
    if( b < 0 && vp->pos < vp->len/8 )
 printf("Early termination in getnbits.\n");
#endif /* VECDEBUG */
    return( ( b<0 ) ? -1 : val );
}

#ifdef NEW_PUTNBITS
int putbit(BITVEC *vp, int b)
{
 unsigned char temp;
 
 temp = b & 0x1;
 temp = temp << (8 - vp->bit - 1);
 vp->cur = vp->cur | temp;
 vp->bit++;

 if( vp->bit == 8 ) /* go to next byte */
 {
  if( vp->pos >= vp->size )
   expandvec(vp);

  if (disaster == false)
  {
   vp->vector[vp->pos] = vp->cur;
   vp->cur = 0;
   vp->pos++;
   vp->bit = 0;
   /*vp->vector[vp->pos] = 0;*/
  }
 } 
    return(1);
}

#else /* Old putnbits() */
/* Put a bit into vector; bytes are filled left to right */
/* Because of the way bytes are filled, vectors can't be accessed until
   padvec has been called. */
int putbit(BITVEC *vp, int b)
{
  vp->cur = ( vp->cur << 1 ) | ( b & 0x1 );
  vp->bit++;

  if( vp->bit == 8 ) /* go to next byte */
  {
#ifdef DEGUG_PUTNBITS 
  printf("vp->pos = %i\tvp->bit = %i\tvp->cur = %X\n", vp->pos, vp->bit, vp->cur);
#endif  
  if( vp->pos >= vp->size )
   expandvec(vp);

  if (disaster == false)
  {
   vp->vector[vp->pos] = vp->cur & 0xff;
   vp->cur = 0;
   vp->pos++;
   vp->bit = 0;
    }
 }
    return(1);
}
#endif /* NEW_PUTNBITS */

/* Get a bit from a vector */
int getbit(BITVEC *vp)
{
    int  b;

#ifdef DEBUGBIT
 printf("yep\n");
#endif

    if( 8*vp->pos + vp->bit >= vp->len )
  return(-1);

    b = ( vp->cur >> ( 7 - vp->bit ) ) & 0x1;
    vp->bit++;
   
    if( vp->bit == 8 )
    {
  vp->pos++;
  vp->bit = 0;
  vp->cur = vp->vector[vp->pos];
    }
    return(b);
}

#undef DEBUG_PADVEC

#ifdef NEW_PUTNBITS
/* Terminate a vector */
void padvec(BITVEC *vp)
{
  if( vp->pos >= vp->size ) /* this is a bit yucky */
  expandvec(vp);

 if (disaster == false)
 {
  vp->vector[vp->pos] = vp->cur;
  vp->len = 8*vp->pos + vp->bit;
#ifdef DEBUG_PADVEC 
  printf(" vp->vector[%i] = %X\n", vp->pos, vp->vector[vp->pos]);
   printf(" vp->len = %i\n", vp->len);
#endif
 }
}
#else /* Old putnbits() */
/* Terminate a vector */
void padvec(BITVEC *vp)
{
  if( vp->pos >= vp->size ) /* this is a bit yucky */
  expandvec(vp);

 if (disaster == false)
 {
    vp->vector[vp->pos] = ( vp->cur << ( 8-vp->bit ) ) & 0xff;
    vp->len = 8*vp->pos + vp->bit;
#ifdef DEBUG_PADVEC 
  printf(" vp->vector[%i] = %X\n", vp->pos, vp->vector[vp->pos]);
  printf(" vp->len = %i\n", vp->len);
#endif
 }
}
#endif /* NEW_PUTNBITS */

/* Enlarge vector */
void expandvec(BITVEC *vp)
{
  char *newvec;
  int  i;
   
 /* should use realloc here */
  if ((newvec = malloc( vp->size*2 * sizeof(char) )) == NULL)
  {
  fprintf(stderr, "Error in expandvec malloc. Tried to allocate: %d bytes\n", vp->size*2 * sizeof(char));
  disaster = true;
  } 
 else
 {
  for( i=0 ; i<vp->size ; i++ )
   newvec[i] = vp->vector[i];

  vp->size = vp->size*2;
  free(vp->vector);
  vp->vector = newvec;
 }
}

/** Copy one vector to another for debugging purposes**/
BITVEC *VecCpy(BITVEC *VecDest, BITVEC *VecSrc)
{
 if(VecDest->vector != NULL) free(VecDest->vector);

 VecDest->size = VecSrc->size;  /* Of vector, in bytes */
 VecDest->pos  = VecSrc->pos;   /* Current byte number */
 VecDest->bit  = VecSrc->bit;   /* Current bit in byte */
 VecDest->cur  = VecSrc->cur;   /* Temporary space for putting, getting bits */
 VecDest->len  = VecSrc->len;   /* Number of bits used */
 VecDest->last = VecSrc->last;  /* Total of runlengths seen so far */

  if((VecDest->vector = malloc(VecSrc->size * sizeof(char)))!=NULL)
  memcpy(VecDest->vector, VecSrc->vector, VecSrc->size * sizeof(char));
 else
 {
  VecDest->size =     /* Of vector, in bytes */
  VecDest->pos  =     /* Current byte number */
  VecDest->bit  =     /* Current bit in byte */
  VecDest->cur  =     /* Temporary space for putting, getting bits */
  VecDest->len  =     /* Number of bits used */
  VecDest->last = 0;  /* Total of runlengths seen so far */
 }
 
 return(VecDest);
}

 

 

/******* Cache Compression Functions ***********************************/

#ifdef SUPPORT_FILEBUF_H_CACHING

#include "FileBuf.h"

/*** Variable byte functions with caching ***/
/* Hugh Williams, August 1995 */
/* Dirk (Reviewed March 2001)*/

/*
#define NORMAL_INTS TRUE

   Uncomment this if you want integers to be stored uncompressed
  Otherwise, all values will be stored w/ variable bit length compression
*/

/* Variable byte length reads and writes of integers */

/**************
  Read integer value as normal Integer
  or as an array of four bytes (packed representation)
**************/
int cVbyteRead(FB_ROOT *fp)
{
  char tmp = 0x1;
  int val = 0;
 
#ifdef NORMAL_INTS
  /* Integers are stored in uncommpressed format */

  if(cRead(&val, sizeof(int), 1, fp) == 0)
  {
  if(cEof(fp)) return(-1);
  else
   return(0); 
  }  
  
#else
  /* Integers are stored in compressed format */

  while((tmp & 0x1) == 0x1)
  {
    if(cRead(&tmp, sizeof(char), 1, fp) == 0)
    {
      if(cEof(fp)) return(-1);
      else
        return(0); 
    }     

    val = (val << 7) + ((tmp >> 1) & 127);
  }
#endif

  return(val);
}


/**************
  Write integer value as normal Integer
  or as an array of four bytes (packed representation)
**************/
int cVbyteWrite(int number, FB_ROOT *fp /*, char *pcCallerID*/)
{
  char bytearray[5];   /**DB: We need five byte to cover the full range of int**/
  int x, started = false;
  int charswritten = 0;
 
#ifdef NORMAL_INTS

  cWrite(&number, sizeof(int), 1, fp);

#else

  /* Compress Integers in 7-Bit format
     and use the lowest bit to signal
     whether more bytes follow (1) in the
     Bitstream or not (0)
  Dirk: We assume that number are not greater than 28 bit

 Use this when uncertain:

 if(number < 0 || number > 268435455)
  printf("Vbyte Panic %i %s\n",number, pcCallerID);
*/
  for(x=0;x<5;x++)
  {
 /** select seven bits and transfer them one bit left **/
 /** store the selected seven bits in the byte array  **/
    bytearray[x] = (number%128) << 1;
 /** get next seven bits through masking the last seven **/
    number = number / 128;
  }

  /**Write data to disk**/
  for(x=4;x>0;x--)
  {
    if (bytearray[x] != 0 || started == true)
    {
      started = true;
      bytearray[x] |= 0x1; 
      cWrite(&bytearray[x], sizeof(char), 1, fp);
      charswritten++;
    }
  } 

  cWrite(&bytearray[0], sizeof(char), 1, fp);
  charswritten++;

#endif
 
  return(charswritten);
}

/* Read a vector back from file */
/* Returns -1 on error */
int cVbyteReadVec(BITVEC *vp, FB_ROOT *fp)
{
  int len;

  if ((vp->len = cVbyteRead(fp)) == -1)
  {
    fprintf(stderr, "cVbyteReadVec: Error reading vbyte coded vector length %d\n", vp->len);
  return(-1);
  } 
  vp->size = 4 * ( ( vp->len/8 + 8 ) / 4 ); /* word boundaries */
  len = vp->len/8 + 1;
  vp->vector = malloc(vp->size * sizeof(char));
  if (cRead(vp->vector, sizeof(char), len, fp) <= 0)
  {
    fprintf(stderr, "cVbyteReadVec: Error reading vector (length: %d)\n", len);
  return(-1);
 }   

  resetvec(vp);
  return(len);
}

/* Write the contents of a vector to file.  Assumes that padvec has
   been called. */
void cDumpVec(BITVEC *vp, FB_ROOT *fp)
{
  cWrite(&vp->len, sizeof(int), 1, fp);
  cWrite(vp->vector, sizeof(char), vp->len/8 + 1, fp);
}

/* Read a vector back from file */
/* Returns -1 on error */
int cReadVec(BITVEC *vp, FB_ROOT *fp)
{
 int len;

 cRead(&vp->len, sizeof(int), 1, fp);
 vp->size = 4 * ( ( vp->len/8 + 8 ) / 4 ); /* word boundaries */
 len = vp->len/8 + 1;
 vp->vector = malloc(vp->size * sizeof(char));

 if(vp->vector != NULL)
 {
  cRead(vp->vector, sizeof(char), len, fp);
  resetvec(vp);
 }
 else
 {
  printf("Can not allocate %i bytes...:(\n", vp->size * sizeof(char));
  return -1;
 }

 return(0);
}

#endif

如上为第一部分,
#include "def.h"

/*  Variable byte length reads and writes of integers */
/*  We use a representation in which seven bits in each byte is used to code an
 integer, with the least significant bit set to 0 if this is the last byte,
 or to 1 if further bytes follow.
 In this way, we represent small integers efficiently; for example, we
 represent 135 in two bytes, since it lies in the range $[2^7\cdots 2^{14})$,
 as 00000011~00001110; this is read as 00000010000111 by removing the least
 significant bit from each byte and concatenating the remaining~14~bits. */

int vbyteread(FILE *fp)
{
 char tmp = 0x1;
 int val = 0;
 
  while((tmp & 0x1) == 0x1)
  {
   if (fread(&tmp, sizeof(char), 1, fp) == 0)
   {
    if (feof(fp))
     return(-1);
    else
     return(0); 
   }     
   val = (val << 7) + ((tmp >> 1) & 127); 
  }
 return(val);
}

int vbytewrite(int number, FILE *fp)
{
 char bytearray[4];
 char tmp = 0;
 int x, started = FALSE;
 int charswritten = 0;
 
  for(x=0;x<4;x++)
  {
   tmp = (number%128) << 1;
   bytearray[x] = tmp;
   number /= 128;
  }     
  for(x=3;x>0;x--)
  {
   if (bytearray[x] != 0 || started == TRUE)    
   {
    started = TRUE;
    bytearray[x] |= 0x1; 
    fwrite(&bytearray[x], sizeof(char), 1, fp);
    charswritten++;
   }
  } 
  bytearray[0] |= 0x0;
  fwrite(&bytearray[0], sizeof(char), 1, fp);
  charswritten++;
 return(charswritten);
}
第二部分

#include <stdio.h>
#include <ctype.h>
#include <math.h>
#include <sys/types.h>
//#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>

/* Miscellaneous Constants */
#define true  1
#define false  0
#define FALSE  0
#define TRUE  1

#define BIGVECLEN 5000000 
#define GOLHASHTABLESIZE 102397

/* Bit vector structure used in standard Golomb, Elias Gamma & Elias Delta */
typedef struct bitvecrec
{
    unsigned char *vector;  /* Sequence of bytes to hold bitvector */
    int  size;     /* Of vector, in bytes */
    int  pos;      /* Current byte number */
    int  bit;      /* Current bit in byte */
    unsigned char cur;      /* Temporary space for putting, getting bits */
    int  len;      /* Number of bits used */
    int  last;     /* Total of runlengths seen so far */
  /* The last field is an oddity; it allows us to have
     several bitvectors open at once */
} BITVEC;

typedef struct golombhashstruct
{
 int   b; /* b value used for lookup */
 int   q;  /* q value that is floor(log_2(b)) */
} GOLHASH;


/* Logarithmic functions:

    LOGn(X) = LOG(X) / LOG(N) <--- LOG Base n computed through LOG Base 10
*/

#define L2(f)  ( log10((f)) / 0.301029995 /*=log10(2.0)*/ )
#define L4(f)  ( log10((f)) / 0.602059999 /*=log10(4.0)*/ )
#define L6(f)  ( log10((f)) / 0.778151250 /*=log10(6.0)*/ )
#define L8(f)  ( log10((f)) / 0.903089990 /*=log10(8.0)*/ )
#define LN(f)  ( log10((f)) / 0.434294480 /*=log10(e)*/ )

void padvec(BITVEC *), expandvec(BITVEC *), initcmp(), initvec(BITVEC *, int),
  freevec(BITVEC *), resetvec(BITVEC *), inittree(), cleanuptree(),
  dumpvec(BITVEC *,FILE *), dumptree(char *);
 
int vbyteread(FILE *);
int vbytewrite(int, FILE *);

static int disaster = false;
static int int_size = sizeof(int)*8;


/* =============== Bitvector Functions ============== */

/* Initialising a vector before insertion of data */
void initvec(BITVEC *vp, int initveclen);

/* Return length of vector */
int veclen(BITVEC *vp);

/* Before a vector can be read, it must be reset */
void resetvec(BITVEC *vp);
void freevec(BITVEC *vp);

/* Write the contents of a vector to file.  This version does not use
   vbtyewrite for the vector length, so if the vector is to be opened
  with loadvector, use vbytedumpvec (below) instead.
   Assumes that padvec has been called. */
void dumpvec(BITVEC *vp, FILE *fp);

/* This is a different version of dumpvec that uses vbyteread to
   get the vector length... this is the operation that is 'undone'
  in vbytereadvec (which is called by loadvector)...
  Assumes that padvec has been called. */
int vbytedumpvec(BITVEC *vp, FILE *fp);

/* Read a vector back from file */
/* Returns -1 on error */
int vbytereadvec(BITVEC *vp, FILE *fp);

/* Read a vector back from file */
/* Returns -1 on error */
int readvec(BITVEC *vp, FILE *fp);

/* Read a vector back from file -- used in cafezipread.c */
/* Returns -1 on error */
int readzipvec(BITVEC *vp, FILE *fp);

int roadzipvec(BITVEC *vp, char *buf,int buflen);/*把buf装入vec中 add by 方旭光2004-10-11*/

/* =============== End Bitvector Functions ============== */

/* =============== Compression Functions ================== */

int putunary(BITVEC *vp, int n);

int getunary(BITVEC *vp);

int getunary2(BITVEC *vp, char *table);

void initunarylookup(char *table);

/* Add the number n to the vector, assuming run-length coding */
int putrunlen(BITVEC *vp, int n);

/* Get a number from the vector, assuming run length coding */
int getrunlen(BITVEC *vp);

/* Put an int in standard delta (Elias) Coding */
int putdelta(BITVEC *vp, int n);

/* Returns -1 on eof */
int getdelta(BITVEC *vp);

/* Returns 0 on eof */
int scandelta(BITVEC *vp);

/* Standard gamma (Elias) */
int putgamma(BITVEC *vp, int n);

/* Returns -1 on eof */
int getgamma(BITVEC *vp);

/* Variable-byte coding */
//int putvbyte(BITVEC *vp, int n);
int putvbyte(BITVEC *vp, unsigned long n);

/* Get variable-byte coded value from vector */
int getvbyte(BITVEC *vp);

void scanvbyte(BITVEC *vp);

/* Put n bits in vector, starting with nth from right, going to rightmost */
/* This is not efficient.  Should be recoded to insert several bits
   simultaneously ... but not really worth the effort ... */
int putnbits(BITVEC *vp, int n, int num);

/* Get n bits from vector */
/* Rather than calling getbit for each bit, shifts several bits at
   once if this would empty the current byte; only makes it one or
   two percent faster. */
int getnbits(BITVEC *vp, int num);

/* Put a bit into vector; bytes are filled left to right */
/* Because of the way bytes are filled, vectors can't be accessed until
   padvec has been called. */
int putbit(BITVEC *vp, int b);

/* Get a bit from a vector */
int getbit(BITVEC *vp);

/* Terminate a vector */
void padvec(BITVEC *vp);

/* Enlarge vector */
void expandvec(BITVEC *vp);

/*Load a vector from a file with at a certain offset (don't call initvec,
  but initialise vector on creation (eg. BITVEC vec={NULL,0,0,0,0,0,0}),
 otherwise the BITVEC will be malloc'd twice)*/
int loadvector(long fileoffset, FILE *fpVectors, BITVEC *vp);

/** Copy one vector to another for debugging purposes**/
BITVEC *VecCpy(BITVEC *VecDest, BITVEC *VecSrc);

/* --------------------- Golomb Code ---------------------- */
/**Compute the golomb parameter incl. check :) **/
#define GolParam(k, x1, x2) \
  if((k=(long)(0.69 * ((double)(x1) / (double) x2)))<1) k=1;

/* structure for b and q values in golomb decoding */
/* This structure is used to cache log base 2
   results for faster lookup later on **/
typedef struct golhashroot
{
 int iSize;
 int *ipGolHashTable;

} GOLHASH_ROOT;


/*- structure for b, q, and d values in golomb decoding
 - q and d are funcs of b, so we compute q,d whenever we compute b */
typedef struct golombparamsrec
{
 int b, q, d;
} GOLOMBPARAMS;

/*
 GetGolomb coding version 1 and 2 are used to compute
 Golomb Codes in an accelerated fashion
 -> getgolomb1 compute coding parameters: b,d,q
 -> getgolomb1 get Golomb code from vector vp
*/
int getgolomb1(int iItem, int iCount, GOLHASH_ROOT *pGolHashRoot, GOLOMBPARAMS *pgPar);
int getgolomb2(BITVEC *vp, GOLOMBPARAMS *gPar);

/*
 PutGolomb coding version 1 and 2 are used to compute
 Golomb Codes in an accelerated fashion
 -> putgolomb1 compute coding parameters: b,d,q
 -> putgolomb1 get Golomb code from vector vp
*/
int putgolomb1(int iItem, int iCount, GOLOMBPARAMS *pgPar, GOLHASH_ROOT *pGolHashTable);
int putgolomb2(BITVEC *vp, int x, GOLOMBPARAMS gPar);

int GetMaxB_OffsetFromGolomb();

/* Standard Golomb ** x = value to be put in, b = coding parameter */
int putgolomb(BITVEC *vp, int x, int b);

/**Call this before using getgolomb -> Will Return false if an error occured !!!**/
int initgolhash(GOLHASH_ROOT *rpGolHashRoot, int iSize);

/* Standard Golomb, b = coding parameter */
int getgolomb(BITVEC *vp, int b, GOLHASH_ROOT *pGolHashRoot);

/*<---------------------- End of Golomb Code -------------------->*/

typedef struct riceparamsrec {
 int b, logb;
} RICEPARAMS;

/* Standard Rice encoding.
    NOTE.  b must be a non-negative power of 2 or this will fail. */
int putrice(BITVEC *vp, int x, int b);

/* Standard Rice decoding.
    NOTE.  b must be a non-negative power of 2 or this will fail. */
int getrice(BITVEC *vp, int b, GOLHASH_ROOT *pGolHashRoot);

int getrice1(int iItem, int iCount, GOLHASH_ROOT *pGolHashTable, RICEPARAMS *prPar);
int getrice2(BITVEC *vp, RICEPARAMS *rPar);
int scanrice2(BITVEC *vp, RICEPARAMS *rPar);
/////////////声明






 

本文地址:http://com.8s8s.com/it/it25313.htm