* * * * * 99 ways to program a hex, Part 22: C89, const correctness, assertive, system calls, full buffering So yesterday [1] I presented a non-portable version that was quite a bit faster than the portable version, but I'm not quite done yet. That version just buffered a line at a time—today's version buffers nearly 8k (Kilobyte) worth of data (it's not exact, but it's close enough) between calls to write(). > /************************************************************************* > * > * Copyright 2012 by Sean Conner. All Rights Reserved. > * > * This program is free software; you can redistribute it and/or > * modify it under the terms of the GNU General Public License > * as published by the Free Software Foundation; either version 2 > * of the License, or (at your option) any later version. > * > * This program is distributed in the hope that it will be useful, > * but WITHOUT ANY WARRANTY; without even the implied warranty of > * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > * GNU General Public License for more details. > * > * You should have received a copy of the GNU General Public License > * along with this program; if not, write to the Free Software > * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. > * > * Comments, questions and criticisms can be sent to: sean@conman.org > * > *************************************************************************/ > > /* Style: C89, const correctness, assertive, system calls, full buffering */ > > #include > #include > #include > #include > > #include > #include > #include > #include > > #define LINESIZE 16 > > /********************************************************************/ > > extern const char *sys_errlist[]; > extern int sys_nerr; > > static void do_dump (const int,const int); > static size_t dump_line (char **const,unsigned char *,size_t,const unsigned long); > static void hexout (char *const,unsigned long,size_t,const int); > static void myperror (const char *const); > static size_t myread (const int,char *,size_t); > static void mywrite (const int,const char *const,const size_t); > > /********************************************************************/ > > int main(const int argc,const char *const argv[]) > { > if (argc == 1) > do_dump(STDIN_FILENO,STDOUT_FILENO); > else > { > int i; > > for (i = 1 ; i < argc ; i++) > { > int fhin; > > fhin = open(argv[i],O_RDONLY); > if (fhin == -1) > { > myperror(argv[i]); > continue; > } > > mywrite(STDOUT_FILENO,"-----",5); > mywrite(STDOUT_FILENO,argv[i],strlen(argv[i])); > mywrite(STDOUT_FILENO,"-----\n",6); > > do_dump(fhin,STDOUT_FILENO); > if (close(fhin) < 0) > myperror(argv[i]); > } > } > > return EXIT_SUCCESS; > } > > /************************************************************************/ > > static void do_dump(const int fhin,const int fhout) > { > unsigned char buffer[4096]; > char outbuffer[75 * 109]; > char *pout; > unsigned long off; > size_t bytes; > size_t count; > > assert(fhin >= 0); > assert(fhout >= 0); > > memset(outbuffer,' ',sizeof(outbuffer)); > off = 0; > count = 0; > pout = outbuffer; > > while((bytes = myread(fhin,(char *)buffer,sizeof(buffer))) > 0) > { > unsigned char *p = buffer; > > for (p = buffer ; bytes > 0 ; ) > { > size_t amount; > > amount = dump_line(&pout,p,bytes,off); > p += amount; > bytes -= amount; > off += amount; > count++; > > if (count == 109) > { > mywrite(fhout,outbuffer,(size_t)(pout - outbuffer)); > memset(outbuffer,' ',sizeof(outbuffer)); > count = 0; > pout = outbuffer; > } > } > } > > if ((size_t)(pout - outbuffer) > 0) > mywrite(fhout,outbuffer,(size_t)(pout - outbuffer)); > } > > /********************************************************************/ > > static size_t dump_line( > char **const pline, > unsigned char *p, > size_t bytes, > const unsigned long off > ) > { > char *line; > char *dh; > char *da; > size_t count; > > assert(pline != NULL); > assert(*pline != NULL); > assert(p != NULL); > assert(bytes > 0); > > line = *pline; > > hexout(line,off,8,':'); > if (bytes > LINESIZE) > bytes = LINESIZE; > > p += bytes; > dh = &line[10 + bytes * 3]; > da = &line[58 + bytes]; > > for (count = 0 ; count < bytes ; count++) > { > p --; > da --; > dh -= 3; > > if ((*p >= ' ') && (*p <= '~')) > *da = *p; > else > *da = '.'; > > hexout(dh,(unsigned long)*p,2,' '); > } > > line[58 + count] = '\n'; > *pline = &line[59 + count]; > return count; > } > > /**********************************************************************/ > > static void hexout(char *const dest,unsigned long value,size_t size,const int padding) > { > assert(dest != NULL); > assert(size > 0); > assert((padding >= ' ') && (padding <= '~')); > > dest[size] = padding; > while(size--) > { > dest[size] = (char)((value & 0x0F) + '0'); > if (dest[size] > '9') dest[size] += 7; > value >>= 4; > } > } > > /************************************************************************/ > > static void myperror(const char *const s) > { > int err = errno; > > assert(s != NULL); > > mywrite(STDERR_FILENO,s,strlen(s)); > mywrite(STDERR_FILENO,": ",2); > > if (err > sys_nerr) > mywrite(STDERR_FILENO,"(unknown)",9); > else > mywrite(STDERR_FILENO,sys_errlist[err],strlen(sys_errlist[err])); > mywrite(STDERR_FILENO,"\n",1); > } > > /************************************************************************/ > > static size_t myread(const int fh,char *buf,size_t size) > { > size_t amount = 0; > > assert(fh >= 0); > assert(buf != NULL); > assert(size > 0); > > while(size > 0) > { > ssize_t bytes; > > bytes = read(fh,buf,size); > if (bytes < 0) > { > myperror("read()"); > exit(EXIT_FAILURE); > } > if (bytes == 0) > break; > > amount += bytes; > size -= bytes; > buf += bytes; > } > > return amount; > } > > /*********************************************************************/ > > static void mywrite(const int fh,const char *const msg,const size_t size) > { > assert(fh >= 0); > assert(msg != NULL); > assert(size > 0); > > if (write(fh,msg,size) < (ssize_t)size) > { > if (fh != STDERR_FILENO) > myperror("output"); > > exit(EXIT_FAILURE); > } > } > > /***********************************************************************/ > And that makes all the difference. The portable vesrsion [2]: > [spc]lucy:~/projects/99/src>time ./12 ~/bin/firefox/libxul.so >/dev/null > > real 0m4.985s > user 0m4.969s > sys 0m0.015s > Our first stab at a non-portable, but possibly faster version [3]: > [spc]lucy:~/projects/99/src>time ./20 ~/bin/firefox/libxul.so >/dev/null > > real 0m2.936s > user 0m1.511s > sys 0m1.425s > The “it's quite a bit faster” version [4]: > [spc]lucy:~/projects/99/src>time ./21 ~/bin/firefox/libxul.so >/dev/null > > real 0m0.957s > user 0m0.645s > sys 0m0.313s > And finally, the punchline—today's version: > [spc]lucy:~/projects/99/src>time ./22 ~/bin/firefox/libxul.so >/dev/null > > real 0m0.460s > user 0m0.448s > sys 0m0.012s > And yes, that's the real output—1/10 the time of the portable version with a similar amount of time in the kernel. Frankly, I was a bit surprised at these results—not that the non-portable version was faster (that's almost a given) but the magnitude of the results. I didn't think the standard C library had that much overhead. I was expecting easily a percentage increase in speed, but even twice would have been unexpected, but ten times faster? Wow. Increasing the size of the buffer past what I have probably won't help all that much, and in fact, when I doubled the buffer size: > [spc]lucy:~/projects/99/src>time ./22 ~/bin/firefox/libxul.so >/dev/null > > real 0m0.592s > user 0m0.582s > sys 0m0.010s > The timing difference could be due to cache effects, maybe? So I think we've maxed out the speed at which this program will run. As a test, I profiled the code to see if there was anything I migh have missed: > Each sample counts as 0.01 seconds. > % cumulative self self total > time seconds seconds calls ms/call ms/call name > 100.21 0.41 0.41 1 410.86 410.86 do_dump > 0.00 0.41 0.00 9197 0.00 0.00 mywrite > I checked the code GCC [5] produced (all code was compiled with -O3, a very high level of optimization) and well, I'm not sure I could have done much better, and probably would have done worse—GCC inlined everything into do_dump() (with the exception of main() and mywrite()), something I would not have done in assembly (and have any hope of code reuse for another project). So I think we're done with making this code fast. That's not to say I won't do an assembly version of this program, but it probably won't be for the x86 line. * Part 21: C89, const correctness, assertive, system calls, per line buffering [6] * Part 23: C89, const correctness, assertive, system calls, full buffering, lookup table [7] [1] gopher://gopher.conman.org/0Phlog:2012/01/29.1 [2] gopher://gopher.conman.org/0Phlog:2012/01/20.2 [3] gopher://gopher.conman.org/0Phlog:2012/01/28.1 [4] gopher://gopher.conman.org/0Phlog:2012/01/29.1 [5] http://gcc.gnu.org/ [6] gopher://gopher.conman.org/0Phlog:2012/01/29.1 [7] gopher://gopher.conman.org/0Phlog:2012/01/31.2 Email Sean Conner at sean@conman.org .