-h- lz.h Wed Jul 24 11:49:39 1985 USER$A:[MINOW.LZ]LZ.H;72 /* * Header file for all lz compression/decompression routines. * * Machine/Operating system/compiler selection: (#ifdef'ed) * vax Vax/Unix or Vax/VMS * pdp11 makes a small compressor * M_XENIX "large-model" Z8000 * interdata Signed long compare is slow * unix Defined on true Unix systems * decus Decus C (no signal) * vms Vax/VMS (VMS_V4 may be set automatically) * #define readonly If the compiler doesn't support it correctly. * * Compiler configuration (#if'ed): * #define vax_asm TRUE/FALSE TRUE on Vax (4bsd) if the compiler supports * the asm() operator. Check the generated code! * #define UCHAR TRUE/FALSE TRUE if compiler supports unsigned char * #define DEBUG TRUE/FALSE TRUE to compile in debug printouts * * Algorithm Tuning parameters: * #define USERMEM Memory available to compress. * If large enough, a faster algorithm is used. * #define SACREDMEM Don't use this part of USERMEM. * #define BITS Maximum number of code bits. * #define MAXIO Output buffer size (squeeze memory if needed) */ #include #include #include #ifndef decus # include /* * Arguments to signal(): */ extern int abort(); /* Debugging interrupt trap */ extern int interrupt(); /* Non-debugging interrupt trap */ extern int address_error(); /* "Segment" violation */ #endif #ifndef TRUE # define FALSE 0 # define TRUE 1 #endif #ifndef EOS # define EOS '\0' #endif #define streq(a, b) (strcmp((a), (b)) == 0) #define min(a,b) ((a) > (b)) ? (b) : (a)) /* * Set USERMEM to the maximum amount of physical user memory available * in bytes. USERMEM is used to determine the maximum BITS that can be used * for compression. * * SACREDMEM is the amount of physical memory saved for others; compress * will hog the rest. */ #ifndef SACREDMEM # define SACREDMEM 0 #endif /* * Set machine-specific parameters */ #ifdef vax # ifdef unix # define vax_asm TRUE /* If asm() supported on vax */ # endif #endif #ifndef vax_asm # define vax_asm FALSE #endif #ifdef pdp11 # define BITS 12 /* max bits/code for 16-bit machine */ # define USERMEM 0 /* Force no user memory */ # define UCHAR FALSE /* TRUE if compiler supports unsigned char */ # define MAXIO 512 /* Buffer size for PDP-11 I/O buffers */ #endif /* * Set default values for some parameters. */ #ifndef DEBUG # define DEBUG FALSE #endif #ifdef interdata # define SIGNED_COMPARE_SLOW TRUE #endif #ifndef SIGNED_COMPARE_SLOW # define SIGNED_COMPARE_SLOW FALSE #endif #ifndef USERMEM # define USERMEM 750000 /* default user memory */ #endif #ifndef UCHAR # define UCHAR TRUE /* Compiler supports unsigned char */ #endif #ifndef MAXIO # define MAXIO 2048 /* I/O buffer size */ #endif /* * Set derived tuning parameters. */ #ifndef USERMEM # define USERMEM 0 #endif #if USERMEM >= (433484 + SACREDMEM) # define PBITS 16 #else # if USERMEM >= (229600 + SACREDMEM) # define PBITS 15 # else # if USERMEM >= (127536 + SACREDMEM) # define PBITS 14 # else # if USERMEM >= ( 73464 + SACREDMEM) # define PBITS 13 # else /* Smaller systems */ # define PBITS 12 # endif # endif # endif #endif #ifndef BITS # define BITS PBITS #endif #ifdef M_XENIX # if BITS >= 16 # define XENIX_16 /* Enable special vector access macros */ # else # if BITS > 13 # undef BITS # define BITS 13 /* Code only handles BITS = 12, 13, 16 */ # endif # endif #endif /* * HSIZE is the size of the hash lookup table. It is set to * 1 << BITS + fudge factor, rounded up to a prime number. * If it is too big, the "clear the hash" routine will take * too long. The same numbers are replicated in the getsize() * routine's data table. */ #if BITS == 16 # define HSIZE 69001 /* 95% occupancy */ #endif #if BITS == 15 # define HSIZE 35023 /* 94% occupancy */ #endif #if BITS == 14 # define HSIZE 18013 /* 91% occupancy */ #endif #if BITS == 13 # define HSIZE 9001 /* 91% occupancy */ #endif #if BITS <= 12 # define HSIZE 5003 /* 80% occupancy */ #endif /* * typedef's -- somewhat machine specific. */ /* * a code_int must be able to hold 2**BITS values of type int, and also -1 */ #if BITS > 15 typedef long int code_int; #else typedef int code_int; #endif /* * A count_int must hold ((2**BITS)-1) + (255<> #endif #define LZ_CLEAR (NBR_CHAR) /* Clear code */ #define LZ_SOH (LZ_CLEAR + 1) /* Start of header block */ #define LZ_STX (LZ_SOH + 1) /* Start of text block */ #define LZ_EOR (LZ_STX + 1) /* End of text record */ #define LZ_ETX (LZ_EOR + 1) /* End of header/text block */ #define LZ_FIRST (LZ_ETX + 1) /* First user (data) code */ #ifdef vms #include errno #include ssdef #include stsdef #define IO_SUCCESS (SS$_NORMAL | STS$M_INHIB_MSG) #define IO_ERROR (SS$_ABORT) #define VMS_V4 L_cuserid >= 16 /* Enable new stuff */ #else #define VMS_V4 0 /* Disable new stuff */ extern int errno; #ifdef decus #define errno $$ferr #endif #endif /* * Define exit() codes. */ #ifndef IO_SUCCESS #define IO_SUCCESS 0 /* Normal exit */ #define IO_ERROR 1 /* Error exit */ #endif /* * All I/O is done by way of "streams". To establish a stream, * set the parameters appropriately and off you go. The following * functions are provided: * lz_fill(stream) fills the buffer from stdin * lz_flush(stream) writes the buffer to stdout * lz_eof(stream) returns EOF (for fill from memory) * lz_fail(stream) abort (for writing to memory). * lz_dummy(stream) throw an output stream away. * Note: if VMS_V4 is enabled and the private (non-export) format * chosen, lz_fill and lz_flush access the files appropriately. * Stream elements are initialized as follows: * Input: bp = NULL; bend = NULL; * Output: bp = bstart; bend = bstart + bsize; */ typedef struct STREAM { char_type *bp; /* Next character to get/put */ char_type *bend; /* -> end of stream buffer */ char_type *bstart; /* Start of stream buffer */ short bsize; /* Stream buffer size */ int (*func)(); /* Read/write a buffer function */ } STREAM; /* * Note also that the compress routine uses putbuf(buf, count, outstream) * and the decompress routine uses getbuf(buf, count, instream) to (quickly) * transfer multiple bytes. */ #if UCHAR #define GET(s) \ (((s)->bp < (s)->bend) ? *(s)->bp++ : (*(s)->func)(s)) #else #define GET(s) \ (((s)->bp < (s)->bend) ? *(s)->bp++ & 0xFF : (*(s)->func)(s)) #endif #define PUT(c, s) \ ((((s)->bp >= (s)->bend) ? (*(s)->func)(s) : 0), *(s)->bp++ = (c)) extern int lz_fill(); extern int lz_flush(); extern int lz_eof(); extern int lz_fail(); extern int lz_dummy(); #if DEBUG extern readonly char *lz_names[]; /* "LZ_CLEAR" etc. */ #endif /* * Options and globals. */ #if VMS_V4 #define ATT_NAME "vms$attributes " #define ATT_SIZE 15 /* strlen(ATT_NAME) */ extern int fdl_status; /* Error code from fdl library */ #endif extern flag binary; /* -b Readable text file if FALSE */ extern flag noheader; /* -x3 No magic header if TRUE */ extern flag export; /* -x (non-zero) Supress vms private */ extern flag block_compress; /* -x2 */ extern flag verbose; /* -v (non-zero) Verbose logging */ extern readonly flag is_compress; /* TRUE if compress, FALSE if decomp. */ extern char *infilename; /* For error printouts */ extern char *outfilename; /* For more error printouts */ extern short n_bits; /* Current # of bits in compressed file */ extern int firstcode; /* First value past signals */ extern jmp_buf failure; /* For longjmp() return */ -h- lzio.c Wed Jul 24 11:49:39 1985 USER$A:[MINOW.LZ]LZIO.C;15 /* * l z i o . c * * I/O buffer management. All input/output I/O is done through these * routines (and the macros in lz.h). The rules of the game are: * * input via GET() and getbuf(). * GET returns an 8-bit byte, or -1 on eof/error. * getbuf() returns the number of things gotten, or -1 on eof/error. * No return on error: longjmp's to the main-line. * * output via PUT() and lz_putbuf(). * No return on error: longjmp's to the main-line. * flush output by lz_flush() before closing files -- or you'll lose data. */ #include "lz.h" #if VMS_V4 #include #ifndef FDLSTUFF #define FDLSTUFF char #endif extern FDLSTUFF *fdl_input; extern FDLSTUFF *fdl_output; extern int fdl_status; #endif int lz_fill(s) register STREAM *s; { register int i; extern char *infilename; #if VMS_V4 if (export && is_compress) { i = fread((char *) s->bstart, 1, s->bsize, stdin); if (ferror(stdin)) { perror(infilename); FAIL("export && is_compress fread error"); } } else { /* Decompress and export/private */ i = fdl_read(s->bstart, s->bsize, fdl_input); if (i < 0 && fdl_status != RMS$_EOF) fdl_message(fdl_input, "Read error"); } #else #ifdef unix i = read(fileno(stdin), (char *) s->bstart, s->bsize); if (i < 0) { perror(infilename); FAIL("unix read error"); } #else i = fread((char *) s->bstart, 1, s->bsize, stdin); if (ferror(stdin)) { perror(infilename); exit(IO_ERROR); } #endif #endif if (i <= 0) return (EOF); else { s->bp = s->bstart; s->bend = &s->bstart[i]; #if UCHAR return (*s->bp++); #else return (*s->bp++ & 0xFF); #endif } } lz_flush(s) register STREAM *s; { register int count; extern char *outfilename; count = s->bp - s->bstart; #if DEBUG if (!is_compress && verbose > 4) { fprintf(stderr, "lz_flush %d: ", count); dumptext(s->bstart, count, stderr); } #endif #if VMS_V4 if (export) { if (is_compress) fwrite((char *) s->bstart, count, 1, stdout); else { register char *bp, *bend; for (bp = s->bstart, bend = bp + count; bp < bend; bp++) putchar(*bp); } if (ferror(stdout)) { perror(outfilename); FAIL("VMS V4 fwrite/putchar error"); } } else { if (fdl_write((char *) s->bstart, count, fdl_output) == -1) { fdl_message(fdl_output, "Write error"); FAIL("VMS V4 fdl_write error"); } } #else #ifdef unix if (write(fileno(stdout), (char *) s->bstart, count) != count) { perror(outfilename); fprintf(stderr, "Can't write to \"%s\"\n", outfilename); FAIL("Unix write error"); } #else fwrite((char *) s->bstart, 1, count, stdout); if (ferror(stdout)) { perror(outfilename); FAIL("Other (decus) fwrite error"); } #endif #endif s->bp = s->bstart; } int lz_getbuf(buffer, count, s) char_type *buffer; int count; register STREAM *s; /* * Read a block of data -- be clever. Return number gotten, or -1 * on eof. */ { register char_type *bp; /* -> buffer */ register char_type *ip; /* -> I/O buffer */ register char_type *ep; /* End of segment */ register int remaining; /* Size of segment */ int datum; if (count == 0) /* Shouldn't happen */ return (0); bp = buffer; while (--count >= 0) { if ((datum = GET(s)) == EOF) /* Maybe fill LZ buff */ break; *bp++ = datum; remaining = s->bend - (ip = s->bp); if (remaining > count) remaining = count; ep = &ip[remaining]; while (ip < ep) *bp++ = *ip++; count -= remaining; s->bp = ip; /* Refresh buffer */ } return ((bp == buffer) ? -1 : bp - buffer); } int lz_putbuf(bp, count, s) register char_type *bp; int count; register STREAM *s; /* * Write a block of data -- be clever. */ { register char_type *op; /* -> I/O buffer */ register char_type *ep; /* End of segment */ register int remaining; /* Size of segment */ while (--count >= 0) { PUT(*bp++, s); /* Forces a buffer */ remaining = s->bend - (op = s->bp); if (remaining > count) remaining = count; ep = &op[remaining]; while (op < ep) *op++ = *bp++; count -= remaining; s->bp = op; /* Refresh buffer */ } } int lz_eof(s) STREAM *s; /* * Dummy routine for read from memory -- returns EOF. */ { return (s, EOF); } int lz_fail(s) STREAM *s; /* * Dummy routine for write to memory -- called if buffer fills. */ { fprintf(stderr, "Memory buffer [%d bytes] filled -- fatal.\n", s->bsize); FAIL("lz_fail crash"); } int lz_dummy(s) STREAM *s; /* * Dummy routine for write to memory -- writes to the bit-bucket. */ { s->bp = s->bstart; } #ifndef decus /* * Signal error handlers. */ #ifdef vms #define unlink delete #endif interrupt() { if (outfilename != NULL && !streq(outfilename, "")) unlink(outfilename); exit(IO_ERROR); } address_error() { if (!is_compress) fprintf(stderr, "Decompress: corrupt input file\n"); interrupt(); } #endif /* * getredirection() is intended to aid in porting C programs * to VMS (Vax-11 C) which does not support '>' and '<' * I/O redirection. With suitable modification, it may * useful for other portability problems as well. */ #ifdef vms int getredirection(argc, argv) int argc; char **argv; /* * Process vms redirection arg's. Exit if any error is seen. * If getredirection() processes an argument, it is erased * from the vector. getredirection() returns a new argc value. * * Warning: do not try to simplify the code for vms. The code * presupposes that getredirection() is called before any data is * read from stdin or written to stdout. * * Normal usage is as follows: * * main(argc, argv) * int argc; * char *argv[]; * { * argc = getredirection(argc, argv); * } */ { register char *ap; /* Argument pointer */ int i; /* argv[] index */ int j; /* Output index */ int file; /* File_descriptor */ for (j = i = 1; i < argc; i++) { /* Do all arguments */ switch (*(ap = argv[i])) { case '<': /* ': /* >file or >>file */ if (*++ap == '>') { /* >>file */ /* * If the file exists, and is writable by us, * call freopen to append to the file (using the * file's current attributes). Otherwise, create * a new file with "vanilla" attributes as if * the argument was given as ">filename". * access(name, 2) is TRUE if we can write on * the specified file. */ if (access(++ap, 2) == 0) { if (freopen(ap, "a", stdout) != NULL) break; /* Exit case statement */ perror(ap); /* Error, can't append */ exit(IO_ERROR); /* After access test */ } /* If file accessable */ } /* * On vms, we want to create the file using "standard" * record attributes. create(...) creates the file * using the caller's default protection mask and * "variable length, implied carriage return" * attributes. dup2() associates the file with stdout. */ if ((file = creat(ap, 0, "rat=cr", "rfm=var")) == -1 || dup2(file, fileno(stdout)) == -1) { perror(ap); /* Can't create file */ exit(IO_ERROR); /* is a fatal error */ } /* If '>' creation */ break; /* Exit case test */ default: argv[j++] = ap; /* Not a redirector */ break; /* Exit case test */ } } /* For all arguments */ argv[j] = NULL; /* Terminate argv[] */ return (j); /* Return new argc */ } #endif #if 1 || DEBUG int col; readonly char *lz_names[] = { "LZ_CLEAR", "LZ_SOH", "LZ_STX", "LZ_EOR", "LZ_ETX", "???" }; dumphex(buffer, count, fd) register char_type *buffer; register int count; FILE *fd; { if (col > 0) { putc('\n', fd); col = 0; } fprintf(fd, "%2d:", count); while (--count >= 0) { fprintf(fd, " %02x", *buffer++ & 0xFF); } fprintf(fd, "\n"); } dumptext(buffer, count, fd) register char_type *buffer; int count; FILE *fd; { extern char *dumpchar(); putc('"', fd); while (--count >= 0) fputs(dumpchar((int) *buffer++), fd); fputs("\"\n", fd); } char * dumpchar(c) register int c; /* * Make a character printable. Returns a static pointer. */ { static char dump_buffer[8]; c &= 0xFF; if (isascii(c) && isprint(c)) { dump_buffer[0] = c; dump_buffer[1] = EOS; } else { switch (c) { case '\n': return ("\\n"); case '\t': return ("\\t"); case '\b': return ("\\b"); case '\f': return ("\\f"); case '\r': return ("\\r"); } sprintf(dump_buffer, "", c); } return (dump_buffer); } #endif /* * Cputime returns the elapsed process time (where available) in msec. * Note: Unix doesn't seem to have a good way to determine ticks/sec. */ #ifdef decus #include long cputime() { struct timeb buf; static struct timeb origin; long result; int msec; if (origin.time == 0) ftime(&origin); ftime(&buf); result = (buf.time - origin.time) * 1000; msec = ((int) buf.msec) - ((int) origin.msec); return (result + ((long) msec)); } #else #ifdef vms #include struct tms { time_t tms_utime; time_t tms_stime; time_t tms_uchild; /* forgot the */ time_t tms_uchildsys; /* real names */ }; #define HERTZ 100.0 /* 10 msec units */ #else #include #include #ifndef HERTZ #define HERTZ 60.0 /* Change for Europe */ #endif #endif long cputime() { struct tms tms; double temp; long result; times(&tms); result = tms.tms_utime + tms.tms_stime; temp = result * 1000.0 / HERTZ; /* Time in msec. */ result = temp; return (result); } #endif -h- lzvio.c Wed Jul 24 11:49:39 1985 USER$A:[MINOW.LZ]LZVIO.C;3 /* * l z v i o . c * For VMS V4 only. */ /* * Problems: * If you open a second input file (getting rms attributes) * it aborts with an internal "fatal" error (15820C LIB-F-FATERRLIB) */ /* * Make TESTING_FDLIO non-zero to enable test code. * * Edit History */ #ifndef TESTING_FDLIO #define TESTING_FDLIO 0 #endif /* * RMS/FDL record level i/o routines for Vax-11 C V4 or greater only. * Rather crude. * * The following are provided: * * #define FDLSTUFF char * #include descrip * * FDLSTUFF * * fdl_open(filename, fdl_descriptor) * char *filename; * struct dsc$descriptor *fdl_descriptor; * Initializes internal buffers and opens this existing * file for input. The filename may not contain wildcards. * On (successful) return, fdl_descriptor will point to * an initialized fdl specification. The description * string will be in malloc'ed memory. The caller does not * initialize the fdl_descriptor. Returns NULL on error. * (Note an error will be returned if the file is not * block-oriented.) * * When you don't need the fdl_descriptor information * any more, free it by calling * fdl_free(fdl_descriptor); * if fdl_descriptor is NULL on entry, the file is opened * normally (fdl information is not collected). * * FDLSTUFF * * fdl_create(fdl_descriptor, override_filename) * struct dsc$descriptor *fdl_descriptor; * char *override_filename; * Creates a file using the fdl specification. * If override_filename is not NULL and not equal to "", * it will override the filename specified in the fdl. * fdl_write() is used to write data to the file. * Returns NULL on error. * * if fdl_descriptor is NULL, the file is created using * the name in override_filename (which must be present). * The file is created in "undefined" record format. * * fdl_free(fdl_descriptor) * struct dsc$descriptor *fdl_descriptor; * Releases the fdl descriptor block. * * int * fdl_read(buffer, buffer_length, r) * char *buffer; * int buffer_length; * FDLSTUFF *r; * Read buffer_length bytes from the file (using SYS$READ). * No expansion or interpretation. buffer_length had * better be even or you're asking for trouble. Returns * the actual number of bytes read. The file has been * opened by fdl_open. * * int * fdl_write(buffer, buffer_length, r) * char *buffer; * int buffer_length; * FDLSTUFF *r; * Write buffer_length bytes to the file (using SYS$WRITE). * No expansion or interpretation. buffer_length had * better be even or you're asking for trouble. Returns * the actual number of bytes written. The file was opened * by fdl_create(); * * fdl_getname(r, buffer) * FDLSTUFF *r; * char *buffer; * Copies the currently open file's name to the caller's * data buffer buffer. * * long * fdl_fsize(r) * Returns the size in bytes of the opened file. * * fdl_dump(fdl_descriptor, fd) * struct dsc$descriptor *fdl_descriptor; * FILE *fd; * Writes the fdl info to the indicated file with * line breaks in appropriate places. * * fdl_message(r, why) * FDLSTUFF *r; * char *why; * All system-level routines set a global value, fdl_status. * fdl_message() prints the error message text corresponding * to the current value of fdl_status. The message printed * has the format: * why current_filename: error_message. * If why is NULL, only the error_message is printed. */ #include "lz.h" #if VMS_V4 #include rms #include ssdef #include descrip #include devdef #ifndef FDL$M_FDL_SIGNAL #define FDL$M_FDL_SIGNAL 1 /* Signal errors if set */ #endif #ifndef FDL$M_FDL_STRING #define FDL$M_FDL_STRING 2 /* Use string for fdl text */ #endif #if TESTING_FDLIO #define SIGNAL_ON_ERROR FDL$M_FDL_SIGNAL #else #define SIGNAL_ON_ERROR 0 #endif #define TRUE 1 #define FALSE 0 #define EOS 0 typedef struct FDLSTUFF { struct RAB rab; /* Record access buffer */ struct FAB fab; /* File access buffer */ struct NAM nam; /* File name buffer */ struct XABFHC xab; /* Extended attributes block */ char starname[NAM$C_MAXRSS + 1]; /* Wild file name */ char filename[NAM$C_MAXRSS + 1]; /* Open file name */ } FDLSTUFF; int fdl_status; /* Set to last rms call status */ static FDLSTUFF * fail(r, why, name) FDLSTUFF *r; /* Buffer */ char *why; /* A little commentary */ char *name; /* Argument to perror */ /* * Problem exit routine */ { #if TESTING_FDLIO if (name == NULL && r != NULL) name = r->fab.fab$l_fna; message(r, why, name); #endif if (r != NULL) free(r); return (NULL); } FDLSTUFF * fdl_open(filename, fdl_descriptor) char *filename; /* What to open */ struct dsc$descriptor *fdl_descriptor; /* Result descriptor */ /* * Open the file. Returns NULL on failure, else a pointer to RMS stuff. * Which is equivalently a pointer to the RAB. (Note that the RAB points * in turn to the FAB.) * * Return the file's fdl descriptor in the user-supplied (uninitialized) * descriptor. */ { register FDLSTUFF *r; int retlen; int badblk; struct FAB *fab_add; struct RAB *rab_add; static int flags = (FDL$M_FDL_STRING | SIGNAL_ON_ERROR); extern FDLSTUFF *fdl_setup(); if ((r = fdl_setup(filename)) == NULL) return (NULL); /* * Now open the file. */ r->fab.fab$b_fac = FAB$M_GET | FAB$M_BIO; /* Block I/O only */ if ((fdl_status = sys$open(&r->fab)) != RMS$_NORMAL) { return (fail(r, "opening file", NULL)); } if ((r->fab.fab$l_dev & DEV$M_REC) != 0) { fail(r, "Record only device"); fdl_close(r); return (NULL); } r->rab.rab$l_rop = RAB$M_BIO; /* Block I/O only */ if ((fdl_status = sys$connect(&r->rab)) != RMS$_NORMAL) return (fail(r, "connecting after open", NULL)); if (fdl_descriptor != NULL) { /* * Now, get the file attributes */ fdl_descriptor->dsc$w_length = 4096; fdl_descriptor->dsc$b_dtype = DSC$K_DTYPE_VT; fdl_descriptor->dsc$b_class = DSC$K_CLASS_D; fdl_descriptor->dsc$a_pointer = malloc(4096); fab_add = &r->fab; rab_add = &r->rab; if ((fdl_status = fdl$generate( &flags, &fab_add, &rab_add, 0, 0, fdl_descriptor, &badblk, &retlen)) != SS$_NORMAL) { fdl_free(fdl_descriptor); sys$close(&r->fab); return(fail(r, "getting fdl info", NULL)); } /* * Success, null-terminate fdl info and squeeze the block. */ fdl_descriptor->dsc$a_pointer[retlen] = EOS; fdl_descriptor->dsc$a_pointer = realloc(fdl_descriptor->dsc$a_pointer, retlen + 1); fdl_descriptor->dsc$w_length = retlen; } return (r); } FDLSTUFF * fdl_create(fdl_descriptor, override_filename) struct dsc$descriptor *fdl_descriptor; /* Result descriptor */ char *override_filename; /* What to open */ /* * Create the file, Returns NULL on failure, else a pointer to RMS stuff. * Which is equivalently a pointer to the RAB. (Note that the RAB points * in turn to the FAB.) The file is open for writing using fdl_write. * * Uses the filename in the descriptor block, or the override filename * if supplied (non-NULL and not == ""); * * If fdl_descriptor is NULL, the override_filename is opened normally. */ { register FDLSTUFF *r; int retlen; int badblk; static int flags = (FDL$M_FDL_STRING | SIGNAL_ON_ERROR); struct dsc$descriptor newname; struct dsc$descriptor *newname_ptr; int fid_block[3]; char created_name[NAM$C_MAXRSS + 1]; struct dsc$descriptor created_name_des = { NAM$C_MAXRSS, DSC$K_DTYPE_T, DSC$K_CLASS_S, &created_name[0] }; extern FDLSTUFF *fdl_setup(); if (fdl_descriptor == NULL) { if ((r = fdl_setup(override_filename)) == NULL) return (NULL); r->fab.fab$b_fac = FAB$M_PUT | FAB$M_BIO; /* Block I/O only */ r->fab.fab$l_fop |= (FAB$M_NAM | FAB$M_SQO | FAB$M_BIO); r->fab.fab$b_org = FAB$C_SEQ; /* Sequential only */ r->fab.fab$b_rfm = FAB$C_UDF; /* Undefined format */ if ((fdl_status = sys$create(&r->fab)) & 01 == 0) return (fail(r, "creating (sys$create)")); goto exit; } if (override_filename == NULL || override_filename[0] == '\0') newname_ptr = NULL; else { newname_ptr = &newname; newname.dsc$w_length = strlen(override_filename); newname.dsc$b_dtype = DSC$K_DTYPE_T; newname.dsc$b_class = DSC$K_CLASS_S; newname.dsc$a_pointer = override_filename; } if ((fdl_status = fdl$create(fdl_descriptor, newname_ptr, /* New file name if any */ 0, /* Default filename */ &created_name_des, /* Resultant filename */ &fid_block[0], /* File ID block */ &flags, /* FDL flag bits */ 0, /* Statement number */ &retlen, /* Created name length */ 0, 0) /* Create status, stv */ ) & 01 == 0) { return(fail(NULL, "creating (fdl$create)", NULL)); } created_name[retlen] = '\0'; if ((r = fdl_setup(created_name)) == NULL) return (NULL); /* * Now, open the file for output. */ r->fab.fab$b_fac = FAB$M_PUT | FAB$M_BIO; /* Block I/O only */ if ((fdl_status = sys$open(&r->fab)) != RMS$_NORMAL) { return (fail(r, "opening created file", NULL)); } exit: if ((r->fab.fab$l_dev & DEV$M_REC) != 0) { fail(r, "Record only device"); fdl_close(r); return (NULL); } r->rab.rab$l_rop = RAB$M_BIO; /* Block I/O only */ if ((fdl_status = sys$connect(&r->rab)) != RMS$_NORMAL) return (fail(r, "connecting after create", NULL)); return (r); } static FDLSTUFF * fdl_setup(filename) char *filename; /* * Initializes rms blocks and parses file name. Returns the * FDL data block on success, NULL on error. */ { register FDLSTUFF *r; if ((r = (char *)malloc(sizeof (FDLSTUFF))) == NULL) return (NULL); r->fab = cc$rms_fab; /* Preset fab, */ r->nam = cc$rms_nam; /* name block */ r->rab = cc$rms_rab; /* and record block */ r->xab = cc$rms_xabfhc; /* file header block */ r->fab.fab$l_nam = &r->nam; /* fab -> name block */ r->fab.fab$l_xab = &r->xab; /* fab -> file header */ r->fab.fab$l_fna = filename; /* Argument filename */ r->fab.fab$b_fns = strlen(filename); /* ... size */ r->rab.rab$l_fab = &r->fab; /* rab -> fab */ /* Stuff the name block */ r->nam.nam$l_esa = r->starname; /* Expanded filename */ r->nam.nam$b_ess = NAM$C_MAXRSS + 1; /* ... size */ r->nam.nam$b_rss = NAM$C_MAXRSS + 1; /* ... max size */ if ((fdl_status = sys$parse(&r->fab)) != RMS$_NORMAL) { return (fail(r, "parsing", filename)); } ((char *)r->nam.nam$l_esa)[r->nam.nam$b_esl] = EOS; r->fab.fab$l_fna = r->nam.nam$l_esa; /* File name */ r->fab.fab$b_fns = r->nam.nam$b_esl; /* Length */ r->fab.fab$l_fop |= FAB$M_NAM; /* Use name block */ return (r); } fdl_free(fdl_descriptor) struct dsc$descriptor *fdl_descriptor; /* * Release the descriptor */ { if (fdl_descriptor->dsc$a_pointer != NULL) { free(fdl_descriptor->dsc$a_pointer); fdl_descriptor->dsc$a_pointer = NULL; } } fdl_close(r) register FDLSTUFF *r; { if ((fdl_status = sys$close(&r->fab)) != RMS$_NORMAL) return(fail(r, "close", NULL)); free(r); } int fdl_read(buffer, buffer_length, r) char *buffer; /* Record */ int buffer_length; /* Record length */ register FDLSTUFF *r; /* Record info. */ /* * Read the next record from the file. Returns number of bytes read or * -1 on any error. fdl_status has the status. */ { r->rab.rab$l_ubf = buffer; r->rab.rab$w_usz = buffer_length; r->rab.rab$l_bkt = 0; if ((fdl_status = sys$read(&r->rab)) != RMS$_NORMAL) { #if TESTING_FDLIO if (fdl_status != RMS$_EOF) { fdl_message(r, "error return from sys$read"); sleep(1); } #endif return (-1); } return (r->rab.rab$w_rsz); } int fdl_write(buffer, buffer_length, r) char *buffer; /* Record */ int buffer_length; /* Record length */ register FDLSTUFF *r; /* Record info. */ /* * Write the next record to the file. Returns number of bytes written or * -1 on any error. fdl_status has the status. */ { r->rab.rab$l_rbf = buffer; r->rab.rab$w_rsz = buffer_length; r->rab.rab$l_bkt = 0; if ((fdl_status = sys$write(&r->rab)) != RMS$_NORMAL) { #if TESTING_FDLIO fdl_message(r, "error return from sys$write"); sleep(1); #endif return (-1); } return (r->rab.rab$w_rsz); } fdl_getname(r, buffer) FDLSTUFF *r; /* File pointer */ char *buffer; /* Where to put it */ /* * Return current file name */ { strcpy(buffer, r->fab.fab$l_fna); return (buffer); } long fdl_fsize(r) FDLSTUFF *r; /* File pointer */ /* * Return current file size */ { return (((long) r->xab.xab$l_ebk * 512) + r->xab.xab$w_ffb); } fdl_message(r, why) FDLSTUFF *r; char *why; /* * Print error message */ { extern char *vms_etext(); if (why == NULL) { fprintf(stderr, "\n%s\n\n", vms_etext(fdl_status)); } else { fprintf(stderr, "\n%s%s%s: %s\n\n", why, (why[0] == EOS) ? "" : " ", (r == NULL) ? "" : r->fab.fab$l_fna, vms_etext(fdl_status)); } } static char errname[257]; /* Error text stored here */ static $DESCRIPTOR(err, errname); /* descriptor for error text */ static char * vms_etext(errorcode) int errorcode; { char *bp; short errlen; /* Actual text length */ lib$sys_getmsg(&errorcode, &errlen, &err, &15); /* * Trim trailing junk. */ for (bp = &errname[errlen]; --bp >= errname;) { if (isgraph(*bp) && *bp != ' ') break; } bp[1] = EOS; return(errname); } static message(r, why, name) FDLSTUFF *r; /* Buffer */ char *why; /* A little commentary */ char *name; /* File name */ /* * Print error message */ { fprintf(stderr, "\nRMS error %x when %s %s\n", fdl_status, why, (name == NULL) ? "" : name); fprintf(stderr, "\"%s\"\n", vms_etext(fdl_status)); } fdl_dump(fdl_descriptor, fd) struct dsc$descriptor *fdl_descriptor; FILE *fd; /* * Dump the descriptor to fd. */ { register char *tp, *end; tp = fdl_descriptor->dsc$a_pointer; end = tp + fdl_descriptor->dsc$w_length; while (tp < end) { if (*tp == '"') { do { putc(*tp++, fd); } while (*tp != '"'); } putc(*tp, fd); if (*tp++ == ';') putc('\n', fd); } } #if TESTING_FDLIO /* * Test program for rms io */ #include char line[133]; char filename[133]; char buffer[2048]; main(argc, argv) int argc; char *argv[]; { FDLSTUFF *old; FDLSTUFF *new; int size, total, nrecords; struct dsc$descriptor fdl_info; /* Result descriptor */ for (;;) { fprintf(stderr, "Old file name: "); fflush(stdout); if (gets(line) == NULL) break; if (line[0] == EOS) continue; if ((old = fdl_open(line, &fdl_info)) == NULL) { fprintf(stderr, "open failed\n"); continue; } fprintf(stderr, "New file name: "); if (gets(line) == NULL) break; if ((new = fdl_create(&fdl_info, line)) == NULL) { fprintf(stderr, "create failed\n"); fdl_free(&fdl_info); continue; } fdl_getname(old, buffer); fprintf(stderr, "Fdl for \"%s\", size %ld\n", buffer, fdl_fsize(old)); fdl_dump(&fdl_info, stderr); total = nrecords = 0; while ((size = fdl_read(buffer, sizeof buffer, old)) > 0) { fdl_write(buffer, size, new); nrecords++; total += size; } fdl_close(old); fdl_close(new); fprintf(stderr, "copied %d records, %d bytes total\n", nrecords, total); fdl_free(&fdl_info); } } #endif #endif -h- makefile.txt Wed Jul 24 11:49:39 1985 USER$A:[MINOW.LZ]MAKEFILE.TXT;6 # Unix makefile for lzcomp, lzdcmp # # The redefinition of strchr() and strrchr() are needed for # Ultrix-32, Unix 4.2 bsd (and maybe some other Unices). # BSDDEFINE = -Dstrchr=index -Dstrrchr=rindex # # On certain systems, such as Unix System III, you may need to define # $(LINTFLAGS) in the make command line to set system-specific lint flags. # CFLAGS = -O $(BSDDEFINES) all : lzcomp lzdcmp # # ** compile lzcomp # LZCOMP_SRCS = lzcmp1.c lzcmp2.c lzcmp3.c lzio.c LZCOMP_OBJS = lzcmp1.o lzcmp2.o lzcmp3.o lzio.o lzcomp: $(LZCOMP_OBJS) $(CC) $(CFLAGS) $(LZCOMP_OBJS) -o lzcomp # # ** compile lzdcmp # LZDCMP_SRCS = lzdcm1.c lzdcm2.c lzdcm3.c lzio.c LZDCMP_OBJS = lzdcm1.o lzdcm2.o lzdcm3.o lzio.o lzdcmp: $(LZDCMP_OBJS) $(CC) $(CFLAGS) $(LZDCMP_OBJS) -o lzdcmp # # ** Lint the code # lint: $(LZCOMP_SRCS) $(LZDCMP_SRCS) lint $(LINTFLAGS) $(DEFINES) $(LZCOMP_SRCS) lint $(LINTFLAGS) $(DEFINES) $(LZDCMP_SRCS) # # ** Remove unneeded files # clean: rm -f $(OBJS) lzcomp lzdcmp # # ** Rebuild the archive files # ** Uses the Decus C archive utility. # archive: cp Makefile makefile.txt archc lzcmp1.c lzcmp2.c lzcmp3.c >lz1.arc archc lzdcm1.c lzdcm2.c lzdcm3.c >lz2.arc archc lz.h lzio.c lzvio.c makefile.txt >lz3.arc # # Object module dependencies # lzcmp1.o : lzcmp1.c lz.h lzcmp2.o : lzcmp2.c lz.h lzcmp3.o : lzcmp3.c lz.h lzio.o : lzio.c lz.h lzdcm1.o : lzdcm1.c lz.h lzdcm2.o : lzdcm2.c lz.h lzdcm3.o : lzdcm3.c lz.h -h- lzcomp.mem Wed Jul 24 11:49:39 1985 USER$A:[MINOW.LZ]LZCOMP.MEM;2 ____ ___________ 1 File Compression ********** * lzcomp * ********** NAME: lzcomp -- File Compression SYNOPSIS: lzcomp [-options] [infile [outfile]] DESCRIPTION: lzcomp implements the Lempel-Ziv file compression algorithm. (Files compressed by lzcomp are uncompressed by lzdcmp.) It operates by finding common substrings and replaces them with a variable-size code. This is deterministic, and can be done with a single pass over the file. Thus, the decompression procedure needs no input table, but can track the way the table was built. Options may be given in either case. -B Input file is "binary", not "human readable text". This is necessary on Dec operating systems, such as VMS and RSX-11M, that treat these files differently. (Note that binary support is rudamentary and probably insufficient as yet.) (On VMS version 4, this is ignored unless the -x option is specified or the input file is record-oriented.) -M bits Write using the specified number of bits in the code -- necessary for big machines making files for little machines. For example, if compressing a file on VMS which is to be read on a PDP-11, you should select -M 12. -V [n] Verbose if specified. If a value is specified, it will enable debugging code (if compiled in). -X [n] "Export" -- write a file format that can be read by other operating systems. Only the bytes in the file are copied; file attributes are not preserved. If specified, the value determines the level of compatiblity. If not specified, or specified with an explicit value of zero, and lzcomp is running on Vax/VMS version 4 under VaxC and the input file is a disk or magtape Page 2 lzcomp File Compression file (block-oriented), a VMS-private output format is used which is incompatible with the Unix compress utility, but which preserves VMS file attributes. -X may take on the following values: 0 Choose VMS private format. See restrictions below. 1 Compatible with Unix compress version 3.0: this is the default if -x is given without a value. 2 As above, but supress "block compression" 3 Supress block compression and do not output a compress header block. This is for compatiblity with a quite early version of Unix compress (and requires conditional-compilation to use). Note that the -B (binary) option is ignored unless the input file is "record-oriented", such as a terminal or mailbox. The other two arguments are the input and output filenames respectively. Redirection is supported, however, the output must be a disk/tape file. The file format is almost identical to the current Unix implementation of compress (V4.0). Files written by Unix compress should be readable by lzdcmp. Files written by lzcomp in export (-x) format will be readable by Unix compress (except that lzcomp outputs two "clear" codes to mark EOF. A patch to Unix compress is available.) VMS RESTRICTIONS: VMS Private mode stores the true name and attributes of the input file into the compressed file and lzdcmp restores the attributes (and filename if requested). The following restrictions apply -- they may be lifted in the future as they are primarily due to the author's lack of understanding of the intricacies of of VMS I/O: All files must be stored on disk. The lzcomp output file must be specified directly. Also, for all usage on VMS, the compressed file must be written to, and read from disk. LZW COMPRESSION ALGORITHM: This section is abstracted from Terry Welch's article referenced below. The algorithm builds a string translation table that maps substrings in the input into Page 3 lzcomp File Compression fixed-length codes. The compress algorithm may be described as follows: 1. Initialize table to contain single-character strings. 2. Read the first character. Set (the prefix string) to that character. 3. (step): Read next input character, K. 4. If at end of file, output code(); exit. 5. If K is in the string table: Set to K; goto step 3. 6. Else K is not in the string table. Output code(); Put K into the string table; Set to K; Goto step 3. "At each execution of the basic step an acceptable input string has been parsed off. The next character K is read and the extended string K is tested to see if it exists in the string table. If it is there, then the extended string becomes the parsed string and the step is repeated. If K is not in the string table, then it is entered, the code for the successfully parsed string is put out as comprssed data, the character K becomes the beginning of the next string, and the step is repeated." The decompression algorithm translates each received code into a prefix string and extension [suffix] character. The extension character is stored (in a push-down stack), and the prefix translated again, until the prefix is a single character, which completes decompression of this code. The entire code is then output by popping the stack. "An update to the string table is made for each code received (except the first one). When a code has been translated, its final character is used as the extension character, combined with the prior string, to add a new string to the string table. This new string is assigned a unique code value, which is the same code that the compressor assigned to that string. In this way, the decompressor incrementally reconstructs the same string table that the decompressor used.... Unfortunately ... [the algorithm] does not work for an abnormal case. The abnormal case occurs whenever an input character string contains the sequence KKK, where K already appears in the compressor string table." The decompression algorithm, augmented to handle the abnormal case, is as follows: 1. Read first input code; Page 4 lzcomp File Compression Store in CODE and OLDcode; With CODE = code(K), output(K); FINchar = K; 2. Read next code to CODE; INcode = CODE; If at end of file, exit; 3. If CODE not in string table (special case) then Output(FINchar); CODE = OLDcode; INcode = code(OLDcode, FINchar); 4. If CODE == code(K) then Push K onto the stack; CODE == code(); Goto 4. 5. If CODE == code(K) then Output K; FINchar = K; 6. While stack not empty Output top of stack; Pop stack; 7. Put OLDcode,K into the string table. OLDcode = INcode; Goto 2. The algorithm as implemented here introduces two additional complications. The actual codes are transmitted using a variable-length encoding. The lowest-level routines increase the number of bits in the code when the largest possible code is transmitted. Periodically, the algorithm checks that compression is still increasing. If the ratio of input bytes to output bytes decreases, the entire process is reset. This can happen if the characteristics of the input file change. VMS PRIVATE FILE STRUCTURE: In VMS Private mode, the compressed data file contains a variable-length (but compressed) file header with the file "attributes" needed by the operating system to construct the file. This allows the decompression program to recreate the file in its original format, which is essential if ISAM databases are compressed. The overall file format is as follows: LZ_SOH "start of header" signal (this value cannot appear in user data). A variable-length data record (maximum 256 Page 5 lzcomp File Compression bytes) containing the header name, followed by whitespace, followed by header-specific information. In this case, the name record will contain the string "vms$attributes" followed by the number of bytes in the attribute data block. (I assume that the name record will consist of a facility name, such as "vms", followed by a dollar sign, followed by a facility-unique word.) LZ_EOR Signals "end of record". This is followed by a VMS file attributes record (generated by a VMS system library routine). LZ_ETX Signals "end of segment". ST_STX Signals "start of text" (i.e., start of data file). This is followed by the user data file. LZ_ETX Signals "end of segment" LZ_ETX Two in a row signals "end of file". Note that this format can easily be extended to include trailer records (with file counts and checksums) and/or multiple data files in one compressed file. Note also that the LZ_CLEAR code may appear in headers or data files to cause the decompression program to "readapt" to the characteristics of the input data. LZ_STX and LZ_SOH reset the compression algorithm. LZ_EOR does not. AUTHORS: The algorithm is from "A Technique for High Performance Data Compression." Terry A. Welch. IEEE Computer Vol 17, No. 6 (June 1984), pp 8-19. This revision is by Martin Minow. Unix Compress authors are as follows: Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas) Jim McKie (decvax!mcvax!jim) Steve Davies (decvax!vax135!petsd!peora!srd) Ken Turkowski (decvax!decwrl!turtlevax!ken) James A. Woods (decvax!ihnp4!ames!jaw) Joe Orost (decvax!vax135!petsd!joe) -h- lzdcmp.mem Wed Jul 24 11:49:39 1985 USER$A:[MINOW.LZ]LZDCMP.MEM;2 ____ _____________ 1 File Decompression ********** * lzdcmp * ********** NAME: lzdcmp -- File Decompression SYNOPSIS: lzdcmp [-options] [infile [outfile]] DESCRIPTION: lzdcmp decompresses files compressed by lzcomp. The documentation for lzcomp describes the process in greater detail. Options may be given in either case. -B Output file is "binary", not text. (Ignored in VMS private mode.) -X 3 To read files compressed by an old Unix version that doesn't generate header records. -V val Verbose (print status messages and debugging information). The value selects the amount of verbosity. AUTHOR: This version by Martin Minow. See lzcomp for more details.