/*----------------------------------------------------------------------------- 'SQ' version 1.5 Richard Greenlaw, 251 Colony Ct, Gahanna, Ohio 43230, U.S.A. Version 1.5a amendments John Hastwell-Batten, 38 Silvia St, Hornsby, NSW 2077, Australia. This program compresses a file without losing information. The companion program USQ.COM is required to unsqueeze the file before it can be used. Typical compression rates are: .COM 6% (Don't bother) .ASM 33% (using full ASCII set) .DIC 46% (using only uppercase and a few others) Squeezing a really big file takes a few minutes. Usage: SQ item ... where ... represents more (optional) items and "item" is either: drive: to change the output drive file input file drive:file input file - toggle debugging display mode + toggle "permission" flag Ambiguous file names are supported by version 1.5a of this utility through Leor Zolman's WILDEXP routine which also allows for EXCLUDING files from the parameter list. See the documentation in WILDEXP.C for details. The squeezed file name is formed by changing the second letter of the file type to Q. If there is no file type, the squeezed file type is QQQ. If the name exists it is overwritten! Examples: A>SQ GRUMP makes GRUMP.QQQ on A: A>SQ D:CRAP.XYZ makes CRAP.XQZ on A: A>SQ B: D:CRAP.COM makes CRAP.CQM on B: B>SQ X.A C: Y.B makes X.AQ on B: and Y.BQ on C: A>sq .* squeezes all files on A: A>sq b: .* puts squeezed versions of all files from A: onto B: A>sq *.* !-.* !*.com squeezes all files on A: except .COM files and those having names starting with a hyphen A>sq b: .* !*.com squeeze all except .COM files onto B: The transformations compress strings of identical bytes and then encode each resulting byte value and EOF as bit strings having lengths in inverse proportion to their frequency of occurrence in the intermediate input stream. The latter uses the Huffman algorithm. Decoding infor- mation is included in the squeezed file, so squeezing short files or files with uniformly distributed byte values will actually increase size. SQ version 1.5a checks the output file size. If it is not WORTHWHILE percent smaller than the original then it will automatically discard the output file, and, if the output file was created on a different disk from the original then the input file will be copied in place of the discarded output. If the + option is active and a poorly-compressed file is generated then SQ 1.5a will ask permission before discarding the output file (and maybe copying the input file. In general, version 1.5a should make it easy to squeeze whole disks with a series of commands such as: findbad b: ;Validate the output disk pip b:=a:-*.* ;Copy the volume id files pip b:=a:crclist.* ;Copy the CRC file if it exists sq b: *.* !-*.* !crclist.* ;Squeeze everything else ------------------------------------------------------------------------------- CHANGE HISTORY: 1.3 Close files properly in case of error exit. 1.4 Break up long introductory lines. Send introduction only to console. Send errors only to console. 1.5 Fix BUG that caused a rare few squeezed files to be incorrect and fail the USQ crc check. The problem was that some 17 bit codes were generated but are not supported by other code. THIS IS A MAJOR CHANGE affecting TR2.C and SQ.H and requires recompilation of all files which are part of SQ. Two basic changes were made: (1) tree depth is now used as a tie-breaker when weights are equal. This makes the tree shallower. Although that may always be sufficient, (2) an error trap was added to cause rescaling of the counts if any code more than 16 bits long is generated. Add debugging displays option '-'. 1.5a Leor Zolman's WILDEXP command-line preprocessor routine is invoked to support ambiguous file names and exclusions. I/O redirection was removed. Output file size checking was implemented to trap the creation of poorly-compressed files. Instead of ignoring files which are already squeezed, SQ will copy them if the destination disk is different from the source disk. The only changes to modules other than SQ.C were to remove the "#include " statements and to replace "fprintf(STDERR.." with "printf(.." 1.6 (Obsolete) Added simple wildcard expansion. -----------------------------------------------------------------------------*/ #define VERSION "1.5a" #define DATE "19th June 1983" #include #include "sqcom.h" #include "sq.h" #define WORTHWHILE 8 /* any less compression than 8% is hardly worth it! */ #define THRESH 9 /* no real point in squeezing any file shorter than 9 sectors */ char default_drive, newdrive, permission, l_worthwhile[4], l_num[4], l_den[4], one_hundred[4], l_pct[8]; int orig_sects, sq_sects, unsq_sects; main(argc, argv) int argc; char *argv[]; { int i,c; if (wildexp(&argc,&argv)) exit(puts("\07**** Too many files to process\n")); itol(l_worthwhile,WORTHWHILE); itol(one_hundred,100); permission = TRUE; debug = FALSE; printf("\n\n\tSQUEEZE version %s (%s)\n",VERSION,DATE); printf("\tA file compression utility by Richard Greenlaw\n"); /* printf("\t251 Colony Ct, Gahanna, Ohio 43230\n"); */ /* Initialize output drive to default drive */ outdrv[0] = '\0'; /* But prepare for a specific drive */ outdrv[1] = ':'; outdrv[2] = '\0'; /* string terminator */ default_drive = bdos(25,0)+'A'; /* Process the parameters in order */ for(i = 1; i < argc; ++i) obey(argv[i]); if (argc > 1) { printf("\n\n%d sectors were compressed to %d representing a ", orig_sects, sq_sects); printf("%s%c shrinkage.\n", percent(orig_sects-sq_sects,orig_sects),'%'); printf("%d sectors were not squeezed. Nett shrinkage was %s%c", unsq_sects, percent(orig_sects-sq_sects,orig_sects+unsq_sects),'%'); } } percent(numerator,denominator) int numerator,denominator; { /* Returns a pointer to an ASCII string representation of the quantity numerator*100/denominator */ return(ltoa(l_pct,ldiv(l_num,lmul(l_num,itol(l_num,numerator), one_hundred),itol(l_den,denominator)))); } obey(p) char *p; { char *q, just_copy; char outfile[16]; /* output file spec. */ if((*p == '-') && (*(p+1)=='\0')) { /* toggle debug option */ debug = !debug; return; } if((*p == '+') && (*(p+1)=='\0')) { /* toggle "permission" flag */ permission = !permission; return; } if(*(p + 1) == ':') { /* Got a drive */ if(isalpha(*p)) { if(*(p+2) == '\0') { /* Change output drive */ printf("\nOutput drive =%s",p); outdrv[0] = *p; newdrive=(*p != default_drive); return; } } else { printf( "\nERROR - Ignoring %s", p); return; } } /* First build output file name */ outfile[0] = '\0'; /* empty */ strcat(outfile, outdrv); /* drive */ strcat(outfile, (*(p + 1) == ':') ? p + 2 : p); /* input name */ /* Find and change output file type */ just_copy = FALSE; for(q = outfile; *q != '\0'; ++q) if(*q == '.') if(*(q + 1) == '\0') *q = '\0'; /* kill trailing dot */ else switch(*(q+2)) { case 'q': case 'Q': just_copy = TRUE; /* just copy to output if necessary */ case '\0': *(q+3) = '\0'; /* fall thru */ default: *(q + 2) = 'Q'; goto named; } /* No file type */ strcat(outfile, ".QQQ"); named: squeeze(p, outfile, just_copy); } squeeze(infile, outfile, just_copy) char *infile, *outfile, just_copy; { int orgsize, newsize; int i, c, gain; char junkit, copy_original; struct _buf inbuff, outbuff; /* file buffers */ printf("\n\n%s -> %s: ", infile, outfile); if(fopen(infile, &inbuff) == ERROR) { printf( "Can't open %s for input pass 1\n", infile); return; } if (((orgsize=cfsize(inbuff._fd)) < THRESH) || just_copy) { unsq_sects += orgsize; printf("%d sectors - ",orgsize); if (just_copy) printf("already squeezed?\n"); else printf("too short to be worth squeezing\n"); copy_original=newdrive; fabort(inbuff._fd); /* File gets opened again later */ goto copy; } if(fcreat(outfile, &outbuff) == ERROR) { printf( "Can't create %s\n", outfile); fclose(&inbuff); return; } junkit = copy_original = FALSE; /* First pass - get properties of file */ crc = 0; /* initialize checksum */ printf("analyzing, "); init_ncr(); init_huff(&inbuff); fclose(&inbuff); /* Write output file header with decoding info */ wrt_head(&outbuff, infile); /* Second pass - encode the file */ printf("squeezing, "); if(fopen(infile, &inbuff) == ERROR) { printf( "Can't open %s for input pass 2\n", infile); junkit = TRUE; goto closeout; } init_ncr(); /* For second pass */ /* Translate the input file into the output file */ while((c = gethuff(&inbuff)) != EOF) if(putc(c, &outbuff) == ERROR) { printf( "ERROR - write failure in %s\n", outfile); junkit = TRUE; goto closeall; } printf("done."); orgsize=cfsize(inbuff._fd); fflush(&outbuff); newsize=cfsize(outbuff._fd); printf("\nInput %d sectors, output %d sectors, i.e. ", orgsize,newsize); if (orgsize-newsize) { printf("a%ssion of %d sectors (%s%c)", ((orgsize=orgsize) || (lcomp(l_num,l_worthwhile)-1)) if (junkit=permission) { printf("\nDiscarding output file\n"); copy_original = newdrive; } else { printf("\nShould I discard the output file%s?", (newdrive ? " and copy the original" : "")); if (junkit=(tolower(c=getchar()) != 'n')) { copy_original = newdrive; if (c != '\n') putc('\n'); } } if (junkit) unsq_sects += orgsize; else { orig_sects += orgsize; sq_sects += newsize; } closeall: fclose(&inbuff); closeout: fflush(&outbuff); fclose(&outbuff); if (junkit) unlink(outfile); copy: if (copy_original) { fopen(infile,&inbuff); /* don't have to check result */ outfile[0] = '\0'; /* empty */ strcat(outfile, outdrv); /* drive */ strcat(outfile,(*(infile+1)==':') ? infile+2 : infile); if (fcreat(outfile,&outbuff)==ERROR) printf("Cannot open output %s%s for simple copy", outdrv,outfile); else { printf("Copying %s unchanged to %s disk",infile,outdrv); while ((c=getc(&inbuff)) != EOF) if (putc(c,&outbuff)==ERROR) { printf("\nError during copy - disk full?"); break; } } copy_original = junkit = FALSE; goto closeall; /* Go back and tidy up the files */ } } .