Development: Embedded BWT
From WxWiki
Below is a Burrows-Wheeler Transform algorithm implemented specifically for embedded machines. See Compression Methods for more info.
// fixed size BWT+RLE+EGC+MTF compression designed for embedded systems (e.g. Gameboy) // BWT = Block Sorting // RLE = Run Length Encoding // ECG = Elias Gamming Coding // MTF = Move To Front // // Slow but requires minimal ram // // Version 0.04 -- Nov 9th 2002 // -- Sped up decompression more // // version 0.03 -- Nov 7th 2002 // -- requires now 1KB + 4*SIZE bytes (0.5KB more than before) // -- Uses improved bin based qsort so that compression takes less time // -- Uses unsigned types to handle 32KB blocks // -- Uses fixed qsort // // Version 0.02 -- Nov 2nd 2002 // -- Updated MTF code to be more efficient (less mem moves) // // // Version 0.01 -- Nov 1st 2002. // -- Updated decompression routines that are faster/smaller // // Tom St Denis, tomstdenis@yahoo.com #include <stdlib.h> #include <string.h> #include <stdio.h> // size of block compressed (bytes) must be power of two // on a 32-bit platform the following ram is required // // Requires 1KB + 4*SIZE bytes of ram to work and about 1kb of stack. #define SIZE 8192 // must alloc ram for these at runtime, each are arrays of SIZE elements unsigned char *bwt_blk, *bwt_sblk, *bwt_out; // these are the work buffers required // if you leave them as null compress()/decompress() will alloc/free ram as required // this lets you fix the address for embedded devices unsigned short *bwt_wrk_mem = NULL; // must have SIZE elements unsigned short *bwt_tab = NULL; // must have 256 elements unsigned short *bwt_tab2 = NULL; // must have 256 elements unsigned char *bwt_ctab; // (setup automatically) // qsort function that is not-recursive #define T 7 #define swap(x, y) { unsigned short t; t = *((unsigned short *)(x)); \ *((unsigned short *)(x)) = *((unsigned short *)(y)); *((unsigned short *)(y)) = t; } static int comp_helper(const void *A, const void *B) { unsigned short a, b; a = *((short *)A); b = *((short *)B); return memcmp(bwt_blk+a+1, bwt_blk+b+1, SIZE-1); } /* main quicksort function */ static void bwt_qsort(unsigned char *base, unsigned elements, unsigned size) { unsigned char *stack[40], **sp; /* stack and stack pointer */ unsigned char *i, *j, *limit; /* scan and limit pointers */ unsigned thresh; /* size of T elements in bytes */ thresh = size*T; /* init threshold */ sp = stack; /* init stack pointer */ limit = base+elements*size; /* pointer past end of array */ for(;;) { /* repeat until break... */ if((unsigned)limit - (unsigned)base > thresh) { /* if more than T elements */ /* swap base with middle */ swap(((((unsigned)limit-(unsigned)base)/size)/2)*size+base, base); i = base + size; /* i scans left to right */ j = limit - size; /* j scans right to left */ if(comp_helper(i, j) > 0) /* Sedgewick's */ swap(i, j); /* three-element sort */ if(comp_helper(base, j) > 0) /* sets things up */ swap(base, j); /* so that */ if(comp_helper(i, base) > 0) /* *i <= *base <= *j */ swap(i, base); /* *base is pivot element */ for(;;) { /* loop until break */ do { /* move i right */ i += size; /* until *i >= pivot */ } while(comp_helper(i, base) < 0); do { /* move j left */ j -= size; /* until *j <= pivot */ } while(comp_helper(j, base) > 0); if(i > j) /* if pointers crossed */ break; /* break loop */ swap(i, j); } /* else swap elements,keep scanning*/ swap(base, j); /* move pivot into correct place */ if((unsigned)j - (unsigned)base > limit - i) { /* if left subfile larger */ sp[0] = base; /* stack left subfile base */ sp[1] = j; /* and limit */ base = i; } else { /* sort the right subfile */ /* else right subfile larger */ sp[0] = i; /* stack right subfile base */ sp[1] = limit; /* and limit */ limit = j; } /* sort the left subfile */ sp += 2; /* increment stack pointer */ } else { /* else subfile is small, use insertion sort */ for(j = base, i = j+size; i < limit; j = i, i += size) while(comp_helper(j, j+size) > 0) { swap(j, j+size); if(j == base) break; j -= size; } if(sp == stack) /* No entries - done! */ break; sp -= 2; /* pop the base and limit */ base = sp[0]; limit = sp[1]; } } } // BIT routines static void putbit(long *k, long *bitcnt, long *bitbuf, long b) { *bitbuf |= b; if (--(*bitcnt) == 0) { bwt_out[(*k)++] = *bitbuf; *bitcnt = 8; *bitbuf = 0; } else { *bitbuf <<= 1; } } static void putbyte(long *k, long *bitcnt, long *bitbuf, long b) { long i; for (i = 0; i < 8; i++) { putbit(k, bitcnt, bitbuf, (b<<i)&0x80?1:0); } } static long getbit(long *k, long *bitcnt, long *bitbuf) { long x; if ((*bitcnt)-- == 0) { *bitcnt = 7; *bitbuf = bwt_blk[(*k)++]; } x = (*bitbuf >> *bitcnt) & 1; return x; } static long getbyte(long *k, long *bitcnt, long *bitbuf) { long x, y; for (x = y = 0; x < 8; x++) { y |= getbit(k, bitcnt, bitbuf)<<(7-x); } return y; } static void bwt_emit_len(long *k, long *bitcnt, long *bitbuf, long len) { long x, diff, bits; /* Elias Gamma Code 1 1 1 01x 2-3 3 001xx 4-7 5 0001xxx 8-15 7 00001xxxx 16-31 9 000001xxxxx 32-63 11 0000001xxxxxx 64-127 13 ... */ // bits equals the 2^n such that 2^n+1 is > len and 2^n <= len for (bits = 0; (1<<bits) <= len; ++bits); --bits; // output the 0's for (x = 0; x < bits; x++) putbit(k, bitcnt, bitbuf, 0); putbit(k, bitcnt, bitbuf, 1); diff = len - (1<<bits); for (x = 0; x < bits; x++) { putbit(k, bitcnt, bitbuf, (diff>>x)&1); } } static long bwt_read_len(long *k, long *bitcnt, long *bitbuf) { long bits, x, y; bits = 0; while (getbit(k, bitcnt, bitbuf) == 0) { ++bits; } y = 1<<bits; for (x = 0; x < bits; x++) { y += getbit(k,bitcnt,bitbuf)<<x; } return y; } static void mtf_encode(long *k, long *bitcnt, long *bitbuf, unsigned char v, int use_mtf) { long x, y; if (use_mtf) { // find 'v' in bwt_tab for (x = 0; x < 256; x++) { if (bwt_ctab[x] == v) { break; } } // emit code for position x bwt_emit_len(k, bitcnt, bitbuf, x+1); // now move 'v' to the front for (y = x; y > 0; y--) { bwt_ctab[y] = bwt_ctab[y-1]; } bwt_ctab[0] = v; } else { putbyte(k, bitcnt, bitbuf, v); } } static unsigned char mtf_decode(long *k, long *bitcnt, long *bitbuf, int use_mtf) { long x, y, v; if (use_mtf) { x = bwt_read_len(k, bitcnt, bitbuf)-1; v = bwt_ctab[x]; // move 'v' to front for (y = x; y > 0; y--) { bwt_ctab[y] = bwt_ctab[y-1]; } bwt_ctab[0] = v; return v; } else { return getbyte(k, bitcnt, bitbuf); } } // determines if MTF is useful or not static int mtf_profile(void) { long x, y, z, v, last, b_mtf, b_nmtf; // setup MTF for (x = 0; x < 256; x++) bwt_ctab[x] = x; b_mtf = b_nmtf = last = 0; for (x = 0; x < SIZE; x++) { if (bwt_blk[(bwt_wrk_mem[x]-1)&(SIZE-1)] != last) { b_nmtf += 8; // find char v = bwt_blk[(bwt_wrk_mem[x]-1)&(SIZE-1)]; for (y = 0; y < 256; y++) { if (bwt_ctab[y] == v) { break; } } ++y; // calc weight of position if (y == 1) { ++b_mtf; } else if (y <= 3) { b_mtf += 3; } else if (y <= 7) { b_mtf += 5; } else if (y <= 15) { b_mtf += 7; } else if (y <= 31) { b_mtf += 9; } else if (y <= 63) { b_mtf += 11; } else if (y <= 127) { b_mtf += 13; } else if (y <= 255) { b_mtf += 15; } else if (y <= 511) { b_mtf += 17; } --y; // sort MTF tab // now move 'v' to the front for (z = y; z > 0; z--) { bwt_ctab[z] = bwt_ctab[z-1]; } bwt_ctab[0] = v; last = v; } } if (b_mtf > b_nmtf) { return 0; } else { return 1; } } static void bwt_decompress(void) { long i, j, k, off, last, run, bitcnt, bitbuf, use_mtf; // get offset off = bwt_blk[0] | ((unsigned)bwt_blk[1] << 8); // now RLE decompress k = 2; bitcnt = 0; // get mtf bit use_mtf = getbit(&k, &bitcnt, &bitbuf); // setup MTF table if (use_mtf) { for (i = 0; i < 256; i++) bwt_ctab[i] = i; } // sort bwt_sblk using the bucket sort memset(bwt_tab2, 0, 256*sizeof(short)); for (j = 0; j < SIZE; ) { run = bwt_read_len(&k, &bitcnt, &bitbuf); last = mtf_decode(&k, &bitcnt, &bitbuf, use_mtf); memset(bwt_sblk+j, last, run); bwt_tab2[last] += run; j += run; } // copy into bwt_blk memcpy(bwt_blk, bwt_sblk, SIZE); // now fill (bucket sort) memset(bwt_tab, 0, sizeof(short) * 256); for (i = j = k = 0; i < 256; i++) { bwt_tab[i] = k; j = bwt_tab2[i]; if (j > 0) { memset(bwt_sblk+k, i, j); k += j; } } // setup vectors to -1 each memset(bwt_wrk_mem, 0xFF, sizeof(short) * SIZE); // now do search for (i = 0; i < SIZE; i++) { // use in table once its found once j = bwt_tab[bwt_blk[i]]; // look for bwt_blk[i] in bwt_sblk[] for (; j < SIZE; j++) { if (bwt_blk[i] == bwt_sblk[j] && bwt_wrk_mem[j] == (unsigned short)-1) { bwt_wrk_mem[j] = i; bwt_tab[bwt_blk[i]] = j+1; break; } } } // copy back for (i = 0; i < SIZE; i++) { bwt_out[i] = bwt_blk[off]; off = bwt_wrk_mem[off]; } } static long bwt_compress(void) { long i, k, last, run, off, bitbuf, bitcnt, use_mtf; // count each byte memset(bwt_tab, 0, sizeof(short) * 256); for (i = 0; i < SIZE; i++) { ++bwt_tab[bwt_blk[i]]; } // make offsets into wrk mem for each for (k = i = 0; i < 256; i++) { bwt_tab2[i] = k; k += bwt_tab[i]; } // now copy each index by byte for (i = 0; i < SIZE; i++) { bwt_wrk_mem[bwt_tab2[bwt_blk[i]]++] = i; } // now sort each bin for (k = i = 0; i < 256; i++) { if (bwt_tab[i]) { bwt_qsort((unsigned char *)(bwt_wrk_mem + k), bwt_tab[i], sizeof(short)); k += bwt_tab[i]; } } // now find first index for (off = i = 0; i < SIZE; i++) { if (bwt_wrk_mem[i] == 1) { off = i; break; } } // RLE compress bwt_out[0] = off&255; bwt_out[1] = off>>8; last = 0; run = 0; k = 2; bitcnt = 8; bitbuf = 0; use_mtf = mtf_profile(); putbit(&k, &bitcnt, &bitbuf, use_mtf); if (use_mtf) { // setup MTF table for (i = 0; i < 256; i++) bwt_ctab[i] = i; } for (i = 0; i < SIZE; i++) { if (last != bwt_blk[(bwt_wrk_mem[i]-1)&(SIZE-1)]) { if (run) { if (k > SIZE-16) return -1; bwt_emit_len(&k, &bitcnt, &bitbuf, run); mtf_encode(&k, &bitcnt, &bitbuf, last, use_mtf); } run = 1; last = bwt_blk[(bwt_wrk_mem[i]-1)&(SIZE-1)]; } else { ++run; } } if (run) { if (k > SIZE-16) return -1; bwt_emit_len(&k, &bitcnt, &bitbuf, run); mtf_encode(&k, &bitcnt, &bitbuf, last, use_mtf); } if (bitcnt != 8) { for (i = 0; i < 8; i++) putbit(&k, &bitcnt, &bitbuf, 0); } return k; } // helper routines // sizes... // in : 2*SIZE bytes // stores result in upper half of in[] long compress(unsigned char *in) { long len, n; // if any of the global points is null... if (bwt_wrk_mem == NULL || bwt_tab == NULL) { n = 1; bwt_wrk_mem = malloc(sizeof(short) * SIZE); bwt_tab = malloc(sizeof(short) * 256); bwt_tab2 = malloc(sizeof(short) * 256); } else { n = 0; } // setup bwt_blk and copy bwt_blk = in; // input memcpy(bwt_blk+SIZE, in, SIZE); // 2nd copy bwt_out = in+SIZE; // setup output bwt_ctab = (unsigned char *)bwt_tab; // MTF tab len = bwt_compress(); if (n == 1) { free(bwt_wrk_mem); free(bwt_tab); free(bwt_tab2); bwt_wrk_mem = NULL; bwt_tab = NULL; bwt_tab2 = NULL; } return len; } // in must be SIZE bytes // out must be SIZE bytes void decompress(unsigned char *in, unsigned char *out) { long n; // if any of the global points is null... if (bwt_wrk_mem == NULL || bwt_tab == NULL || bwt_tab2 == NULL) { n = 1; bwt_wrk_mem = malloc(sizeof(short) * SIZE); bwt_tab = malloc(sizeof(short) * 256); bwt_tab2 = malloc(sizeof(short) * 256); } else { n = 0; } bwt_blk = in; bwt_out = bwt_sblk = out; bwt_ctab = (unsigned char *)bwt_tab; // MTF tab bwt_decompress(); if (n == 1) { free(bwt_wrk_mem); free(bwt_tab); free(bwt_tab2); bwt_wrk_mem = NULL; bwt_tab = NULL; bwt_tab2 = NULL; } } // // DEMO for platforms with a console // #ifdef DEMO #include <time.h> int main(void) { unsigned char src[SIZE+SIZE], res[SIZE]; int x; clock_t t1; // make up a buffer memset(src, 0, sizeof(src)); fread(src, 1, SIZE-1, in); // COMPRESSION t1 = clock(); x = compress(src); printf("Took %lu ticks\n", clock() - t1); printf("Compressed %d downto %d bytes\n", SIZE, x); if (x == -1) { printf("Data not compressed.\n"); return 0; } // DECOMPRESSION t1 = clock(); memset(res, 0, sizeof(res)); decompress(src+SIZE, res); // decompresses bwt_blk to bwt_out printf("Took %lu ticks\n", clock() - t1); // result is in bwt_out[0..SIZE-1] printf("Decompress Res: {%s}\nCompare: %d\n", res, memcmp(res, src, SIZE)); return 0; } #endif
