se.ps - sam - An updated version of the sam text editor.
 (HTM) git clone git://vernunftzentrum.de/sam.git
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) LICENSE
       ---
       se.ps (41645B)
       ---
            1 %!PS
            2 %%Version: 3.3.2
            3 %%DocumentFonts: (atend)
            4 %%Pages: (atend)
            5 %%EndComments
            6 %
            7 % Version 3.3.2 prologue for troff files.
            8 %
            9 
           10 /#copies 1 store
           11 /aspectratio 1 def
           12 /formsperpage 1 def
           13 /landscape false def
           14 /linewidth .3 def
           15 /magnification 1 def
           16 /margin 0 def
           17 /orientation 0 def
           18 /resolution 720 def
           19 /rotation 1 def
           20 /xoffset 0 def
           21 /yoffset 0 def
           22 
           23 /roundpage true def
           24 /useclippath true def
           25 /pagebbox [0 0 612 792] def
           26 
           27 /R  /Times-Roman def
           28 /I  /Times-Italic def
           29 /B  /Times-Bold def
           30 /BI /Times-BoldItalic def
           31 /H  /Helvetica def
           32 /HI /Helvetica-Oblique def
           33 /HB /Helvetica-Bold def
           34 /HX /Helvetica-BoldOblique def
           35 /CW /Courier def
           36 /CO /Courier def
           37 /CI /Courier-Oblique def
           38 /CB /Courier-Bold def
           39 /CX /Courier-BoldOblique def
           40 /PA /Palatino-Roman def
           41 /PI /Palatino-Italic def
           42 /PB /Palatino-Bold def
           43 /PX /Palatino-BoldItalic def
           44 /Hr /Helvetica-Narrow def
           45 /Hi /Helvetica-Narrow-Oblique def
           46 /Hb /Helvetica-Narrow-Bold def
           47 /Hx /Helvetica-Narrow-BoldOblique def
           48 /KR /Bookman-Light def
           49 /KI /Bookman-LightItalic def
           50 /KB /Bookman-Demi def
           51 /KX /Bookman-DemiItalic def
           52 /AR /AvantGarde-Book def
           53 /AI /AvantGarde-BookOblique def
           54 /AB /AvantGarde-Demi def
           55 /AX /AvantGarde-DemiOblique def
           56 /NR /NewCenturySchlbk-Roman def
           57 /NI /NewCenturySchlbk-Italic def
           58 /NB /NewCenturySchlbk-Bold def
           59 /NX /NewCenturySchlbk-BoldItalic def
           60 /ZD /ZapfDingbats def
           61 /ZI /ZapfChancery-MediumItalic def
           62 /S  /S def
           63 /S1 /S1 def
           64 /GR /Symbol def
           65 
           66 /inch {72 mul} bind def
           67 /min {2 copy gt {exch} if pop} bind def
           68 
           69 /show {show} bind def                % so later references don't bind
           70 /widthshow {widthshow} bind def
           71 /stringwidth {stringwidth} bind def
           72 
           73 /setup {
           74         counttomark 2 idiv {def} repeat pop
           75 
           76         landscape {/orientation 90 orientation add def} if
           77         /scaling 72 resolution div def
           78         linewidth setlinewidth
           79         1 setlinecap
           80 
           81         pagedimensions
           82         xcenter ycenter translate
           83         orientation rotation mul rotate
           84         width 2 div neg height 2 div translate
           85         xoffset inch yoffset inch neg translate
           86         margin 2 div dup neg translate
           87         magnification dup aspectratio mul scale
           88         scaling scaling scale
           89 
           90         addmetrics
           91         0 0 moveto
           92 } def
           93 
           94 /pagedimensions {
           95         useclippath userdict /gotpagebbox known not and {
           96                 /pagebbox [clippath pathbbox newpath] def
           97                 roundpage currentdict /roundpagebbox known and {roundpagebbox} if
           98         } if
           99         pagebbox aload pop
          100         4 -1 roll exch 4 1 roll 4 copy
          101         landscape {4 2 roll} if
          102         sub /width exch def
          103         sub /height exch def
          104         add 2 div /xcenter exch def
          105         add 2 div /ycenter exch def
          106         userdict /gotpagebbox true put
          107 } def
          108 
          109 /addmetrics {
          110         /Symbol /S null Sdefs cf
          111         /Times-Roman /S1 StandardEncoding dup length array copy S1defs cf
          112 } def
          113 
          114 /pagesetup {
          115         /page exch def
          116         currentdict /pagedict known currentdict page known and {
          117                 page load pagedict exch get cvx exec
          118         } if
          119 } def
          120 
          121 /decodingdefs [
          122         {counttomark 2 idiv {y moveto show} repeat}
          123         {neg /y exch def counttomark 2 idiv {y moveto show} repeat}
          124         {neg moveto {2 index stringwidth pop sub exch div 0 32 4 -1 roll widthshow} repeat}
          125         {neg moveto {spacewidth sub 0.0 32 4 -1 roll widthshow} repeat}
          126         {counttomark 2 idiv {y moveto show} repeat}
          127         {neg setfunnytext}
          128 ] def
          129 
          130 /setdecoding {/t decodingdefs 3 -1 roll get bind def} bind def
          131 
          132 /w {neg moveto show} bind def
          133 /m {neg dup /y exch def moveto} bind def
          134 /done {/lastpage where {pop lastpage} if} def
          135 
          136 /f {
          137         dup /font exch def findfont exch
          138         dup /ptsize exch def scaling div dup /size exch def scalefont setfont
          139         linewidth ptsize mul scaling 10 mul div setlinewidth
          140         /spacewidth ( ) stringwidth pop def
          141 } bind def
          142 
          143 /changefont {
          144         /fontheight exch def
          145         /fontslant exch def
          146         currentfont [
          147                 1 0
          148                 fontheight ptsize div fontslant sin mul fontslant cos div
          149                 fontheight ptsize div
          150                 0 0
          151         ] makefont setfont
          152 } bind def
          153 
          154 /sf {f} bind def
          155 
          156 /cf {
          157         dup length 2 idiv
          158         /entries exch def
          159         /chtab exch def
          160         /newencoding exch def
          161         /newfont exch def
          162 
          163         findfont dup length 1 add dict
          164         /newdict exch def
          165         {1 index /FID ne {newdict 3 1 roll put}{pop pop} ifelse} forall
          166 
          167         newencoding type /arraytype eq {newdict /Encoding newencoding put} if
          168 
          169         newdict /Metrics entries dict put
          170         newdict /Metrics get
          171         begin
          172                 chtab aload pop
          173                 1 1 entries {pop def} for
          174                 newfont newdict definefont pop
          175         end
          176 } bind def
          177 
          178 %
          179 % A few arrays used to adjust reference points and character widths in some
          180 % of the printer resident fonts. If square roots are too high try changing
          181 % the lines describing /radical and /radicalex to,
          182 %
          183 %        /radical        [0 -75 550 0]
          184 %        /radicalex        [-50 -75 500 0]
          185 %
          186 % Move braceleftbt a bit - default PostScript character is off a bit.
          187 %
          188 
          189 /Sdefs [
          190         /bracketlefttp                [201 500]
          191         /bracketleftbt                [201 500]
          192         /bracketrighttp                [-81 380]
          193         /bracketrightbt                [-83 380]
          194         /braceleftbt                [203 490]
          195         /bracketrightex                [220 -125 500 0]
          196         /radical                [0 0 550 0]
          197         /radicalex                [-50 0 500 0]
          198         /parenleftex                [-20 -170 0 0]
          199         /integral                [100 -50 500 0]
          200         /infinity                [10 -75 730 0]
          201 ] def
          202 
          203 /S1defs [
          204         /underscore                [0 80 500 0]
          205         /endash                        [7 90 650 0]
          206 ] def
          207 %
          208 % Tries to round clipping path dimensions, as stored in array pagebbox, so they
          209 % match one of the known sizes in the papersizes array. Lower left coordinates
          210 % are always set to 0.
          211 %
          212 
          213 /roundpagebbox {
          214     7 dict begin
          215         /papersizes [8.5 inch 11 inch 14 inch 17 inch] def
          216 
          217         /mappapersize {
          218                 /val exch def
          219                 /slop .5 inch def
          220                 /diff slop def
          221                 /j 0 def
          222                 0 1 papersizes length 1 sub {
          223                         /i exch def
          224                         papersizes i get val sub abs
          225                         dup diff le {/diff exch def /j i def} {pop} ifelse
          226                 } for
          227                 diff slop lt {papersizes j get} {val} ifelse
          228         } def
          229 
          230         pagebbox 0 0 put
          231         pagebbox 1 0 put
          232         pagebbox dup 2 get mappapersize 2 exch put
          233         pagebbox dup 3 get mappapersize 3 exch put
          234     end
          235 } bind def
          236 
          237 %%EndProlog
          238 %%BeginSetup
          239 mark
          240 /linewidth 0.5 def
          241 /#copies 1 store
          242 /landscape false def
          243 /resolution 720 def
          244 %
          245 % Encoding vector and redefinition of findfont for the ISO Latin1 standard.
          246 % The 18 characters missing from ROM based fonts on older printers are noted
          247 % below.
          248 %
          249 
          250 /ISOLatin1Encoding [
          251         /.notdef
          252         /.notdef
          253         /.notdef
          254         /.notdef
          255         /.notdef
          256         /.notdef
          257         /.notdef
          258         /.notdef
          259         /.notdef
          260         /.notdef
          261         /.notdef
          262         /.notdef
          263         /.notdef
          264         /.notdef
          265         /.notdef
          266         /.notdef
          267         /.notdef
          268         /.notdef
          269         /.notdef
          270         /.notdef
          271         /.notdef
          272         /.notdef
          273         /.notdef
          274         /.notdef
          275         /.notdef
          276         /.notdef
          277         /.notdef
          278         /.notdef
          279         /.notdef
          280         /.notdef
          281         /.notdef
          282         /.notdef
          283         /space
          284         /exclam
          285         /quotedbl
          286         /numbersign
          287         /dollar
          288         /percent
          289         /ampersand
          290         /quoteright
          291         /parenleft
          292         /parenright
          293         /asterisk
          294         /plus
          295         /comma
          296         /minus
          297         /period
          298         /slash
          299         /zero
          300         /one
          301         /two
          302         /three
          303         /four
          304         /five
          305         /six
          306         /seven
          307         /eight
          308         /nine
          309         /colon
          310         /semicolon
          311         /less
          312         /equal
          313         /greater
          314         /question
          315         /at
          316         /A
          317         /B
          318         /C
          319         /D
          320         /E
          321         /F
          322         /G
          323         /H
          324         /I
          325         /J
          326         /K
          327         /L
          328         /M
          329         /N
          330         /O
          331         /P
          332         /Q
          333         /R
          334         /S
          335         /T
          336         /U
          337         /V
          338         /W
          339         /X
          340         /Y
          341         /Z
          342         /bracketleft
          343         /backslash
          344         /bracketright
          345         /asciicircum
          346         /underscore
          347         /quoteleft
          348         /a
          349         /b
          350         /c
          351         /d
          352         /e
          353         /f
          354         /g
          355         /h
          356         /i
          357         /j
          358         /k
          359         /l
          360         /m
          361         /n
          362         /o
          363         /p
          364         /q
          365         /r
          366         /s
          367         /t
          368         /u
          369         /v
          370         /w
          371         /x
          372         /y
          373         /z
          374         /braceleft
          375         /bar
          376         /braceright
          377         /asciitilde
          378         /.notdef
          379         /.notdef
          380         /.notdef
          381         /.notdef
          382         /.notdef
          383         /.notdef
          384         /.notdef
          385         /.notdef
          386         /.notdef
          387         /.notdef
          388         /.notdef
          389         /.notdef
          390         /.notdef
          391         /.notdef
          392         /.notdef
          393         /.notdef
          394         /.notdef
          395         /dotlessi
          396         /grave
          397         /acute
          398         /circumflex
          399         /tilde
          400         /macron
          401         /breve
          402         /dotaccent
          403         /dieresis
          404         /.notdef
          405         /ring
          406         /cedilla
          407         /.notdef
          408         /hungarumlaut
          409         /ogonek
          410         /caron
          411         /space
          412         /exclamdown
          413         /cent
          414         /sterling
          415         /currency
          416         /yen
          417         /brokenbar                % missing
          418         /section
          419         /dieresis
          420         /copyright
          421         /ordfeminine
          422         /guillemotleft
          423         /logicalnot
          424         /hyphen
          425         /registered
          426         /macron
          427         /degree                        % missing
          428         /plusminus                % missing
          429         /twosuperior                % missing
          430         /threesuperior                % missing
          431         /acute
          432         /mu                        % missing
          433         /paragraph
          434         /periodcentered
          435         /cedilla
          436         /onesuperior                % missing
          437         /ordmasculine
          438         /guillemotright
          439         /onequarter                % missing
          440         /onehalf                % missing
          441         /threequarters                % missing
          442         /questiondown
          443         /Agrave
          444         /Aacute
          445         /Acircumflex
          446         /Atilde
          447         /Adieresis
          448         /Aring
          449         /AE
          450         /Ccedilla
          451         /Egrave
          452         /Eacute
          453         /Ecircumflex
          454         /Edieresis
          455         /Igrave
          456         /Iacute
          457         /Icircumflex
          458         /Idieresis
          459         /Eth                        % missing
          460         /Ntilde
          461         /Ograve
          462         /Oacute
          463         /Ocircumflex
          464         /Otilde
          465         /Odieresis
          466         /multiply                % missing
          467         /Oslash
          468         /Ugrave
          469         /Uacute
          470         /Ucircumflex
          471         /Udieresis
          472         /Yacute                        % missing
          473         /Thorn                        % missing
          474         /germandbls
          475         /agrave
          476         /aacute
          477         /acircumflex
          478         /atilde
          479         /adieresis
          480         /aring
          481         /ae
          482         /ccedilla
          483         /egrave
          484         /eacute
          485         /ecircumflex
          486         /edieresis
          487         /igrave
          488         /iacute
          489         /icircumflex
          490         /idieresis
          491         /eth                        % missing
          492         /ntilde
          493         /ograve
          494         /oacute
          495         /ocircumflex
          496         /otilde
          497         /odieresis
          498         /divide                        % missing
          499         /oslash
          500         /ugrave
          501         /uacute
          502         /ucircumflex
          503         /udieresis
          504         /yacute                        % missing
          505         /thorn                        % missing
          506         /ydieresis
          507 ] def
          508 
          509 /NewFontDirectory FontDirectory maxlength dict def
          510 
          511 %
          512 % Apparently no guarantee findfont is defined in systemdict so the obvious
          513 %
          514 %        systemdict /findfont get exec
          515 %
          516 % can generate an error. So far the only exception is a VT600 (version 48.0).
          517 %
          518 
          519 userdict /@RealFindfont known not {
          520         userdict begin
          521                 /@RealFindfont systemdict begin /findfont load end def
          522         end
          523 } if
          524 
          525 /findfont {
          526         dup NewFontDirectory exch known not {
          527                 dup
          528                 %dup systemdict /findfont get exec        % not always in systemdict
          529                 dup userdict /@RealFindfont get exec
          530                 dup /Encoding get StandardEncoding eq {
          531                         dup length dict begin
          532                                 {1 index /FID ne {def}{pop pop} ifelse} forall
          533                                 /Encoding ISOLatin1Encoding def
          534                                 currentdict
          535                         end
          536                         /DummyFontName exch definefont
          537                 } if
          538                 NewFontDirectory 3 1 roll put
          539         } if
          540         NewFontDirectory exch get
          541 } bind def
          542 
          543 setup
          544 2 setdecoding
          545 %%EndSetup
          546 %%Page: 1 1
          547 /saveobj save def
          548 mark
          549 1 pagesetup
          550 12 B f
          551 (Structural Regular Expressions)2 1622 1 2069 1230 t
          552 10 I f
          553 (Rob Pike)1 363 1 2698 1470 t
          554 10 R f
          555 (AT&T Bell Laboratories)2 993 1 2383 1650 t
          556 (Murray Hill, New Jersey 07974)4 1267 1 2246 1770 t
          557 10 I f
          558 (ABSTRACT)2643 2150 w
          559 10 R f
          560 (The current)1 465 1 1330 2410 t
          561 9 R f
          562 (UNIX)1821 2410 w
          563 10 R f
          564 ( the built\255in concept of a)5 999(\256 text processing tools are weakened by)6 1635 2 2046 2410 t
          565 ( describe the `shape' of files when the typical)8 1908( is a simple notation that can)6 1222(line. There)1 470 3 1080 2530 t
          566 ( regular)1 316( Using)1 298( is regular expressions.)3 942( notation)1 361( That)1 241(array\255of\255lines picture is inadequate.)3 1442 6 1080 2650 t
          567 ( files has interesting)3 841(expressions to describe the structure in addition to the contents of)10 2759 2 1080 2770 t
          568 (applications, and yields elegant methods for dealing with some problems the current tools)12 3600 1 1080 2890 t
          569 ( are composed, the result is)5 1157( operations using these expressions)4 1464( When)1 303(handle clumsily.)1 676 4 1080 3010 t
          570 (reminiscent of shell pipelines.)3 1199 1 1080 3130 t
          571 10 B f
          572 (The Peter\255On\255Silicon Problem)2 1299 1 720 3490 t
          573 10 R f
          574 ( model,)1 301(In the traditional)2 666 2 970 3646 t
          575 9 R f
          576 (UNIX)1961 3646 w
          577 10 R f
          578 (text files are arrays of lines, and all the familiar tools)10 2120 1 2212 3646 t
          579 10 S1 f
          580 (\320)4358 3646 w
          581 10 CW f
          582 (grep)4484 3646 w
          583 10 R f
          584 (,)4724 3646 w
          585 10 CW f
          586 (sort)4775 3646 w
          587 10 R f
          588 (,)5015 3646 w
          589 10 CW f
          590 (awk)720 3766 w
          591 10 R f
          592 (, etc.)1 197 1 900 3766 t
          593 10 S1 f
          594 (\320)1128 3766 w
          595 10 R f
          596 ( of)1 113( output)1 287( The)1 211(expect arrays of lines as input.)5 1244 4 1259 3766 t
          597 10 CW f
          598 (ls)3144 3766 w
          599 10 R f
          600 (\(regardless of options\) is a list of files, one)8 1746 1 3294 3766 t
          601 (per line, that may be selected by tools such as)9 1825 1 720 3886 t
          602 10 CW f
          603 (grep)2570 3886 w
          604 10 R f
          605 (:)2810 3886 w
          606 10 CW f
          607 (ls \255l /usr/ken/bin | grep 'rws.*root')5 2220 1 1080 4066 t
          608 10 R f
          609 (\(I assume that the reader is familiar with the)8 1803 1 720 4246 t
          610 9 R f
          611 (UNIX)2551 4246 w
          612 10 R f
          613 ( model is powerful, but it is also pervasive,)8 1769(tools.\) The)1 464 2 2807 4246 t
          614 ( Many)1 298(sometimes overly so.)2 877 2 720 4366 t
          615 9 R f
          616 (UNIX)1933 4366 w
          617 10 R f
          618 ( more general, and more useful, if they could be)9 2041(programs would be)2 801 2 2198 4366 t
          619 ( example,)1 400( For)1 201(applied to arbitrarily structured input.)4 1549 3 720 4486 t
          620 10 CW f
          621 (diff)2907 4486 w
          622 10 R f
          623 ( C)1 105(could in principle report differences at the)6 1751 2 3184 4486 t
          624 ( if the interesting quantum of information isn't a line, most of)11 2537( But)1 202( level.)1 251(function level instead of the line)5 1330 4 720 4606 t
          625 (the tools \(including)2 804 1 720 4726 t
          626 10 CW f
          627 (diff)1562 4726 w
          628 10 R f
          629 ( solution so the line\255)4 873( perverting the)2 608( Worse,)1 348(\) don't help, or at best do poorly.)7 1409 4 1802 4726 t
          630 (oriented tools can implement it often obscures the original problem.)9 2714 1 720 4846 t
          631 ( consider the problem of turning)5 1320(To see how a line oriented view of text can introduce complication,)11 2750 2 970 5002 t
          632 ( input is an array of blank and non\255blank characters, like this:)11 2451( The)1 205(Peter into silicon.)2 703 3 720 5122 t
          633 10 CW f
          634 (#######)1320 5252 w
          635 (#########)1260 5322 w
          636 (#### #####)1 660 1 1200 5392 t
          637 ( #)1 180(#### ####)1 720 2 1140 5462 t
          638 (#### #####)1 840 1 1140 5532 t
          639 (#### ###)1 840 1 1080 5602 t
          640 (######## #####)1 960 1 1080 5672 t
          641 (#### #########)1 840 1 1080 5742 t
          642 ( ####)1 300( #)1 180(#### #)1 360 3 1080 5812 t
          643 ( ##)1 300( ###)1 300(## #)1 240 3 1080 5882 t
          644 ( ###)1 300(### #)1 480 2 1080 5952 t
          645 (### ##)1 540 1 1080 6022 t
          646 (## #)1 360 1 1140 6092 t
          647 (# ####)1 480 1 1200 6162 t
          648 (# #)1 180 1 1200 6232 t
          649 (## # ##)2 660 1 1080 6302 t
          650 10 R f
          651 (The output is to be statements in a language for laying out integrated circuits:)13 3094 1 720 6482 t
          652 10 CW f
          653 (rect minx miny maxx maxy)4 1440 1 1080 6662 t
          654 10 R f
          655 ( simplify the problem slightly,)4 1247( To)1 169(The statements encode where the non\255blank characters are in the input.)10 2904 3 720 6842 t
          656 (the coordinate system has)3 1032 1 720 6962 t
          657 10 I f
          658 (x)1778 6962 w
          659 10 R f
          660 (positive to the right and)4 954 1 1848 6962 t
          661 10 I f
          662 (y)2828 6962 w
          663 10 R f
          664 ( output need not be efficient in its)7 1346( The)1 206(positive down.)1 590 3 2898 6962 t
          665 (use of rectangles.)2 723 1 720 7082 t
          666 10 CW f
          667 (Awk)1507 7082 w
          668 10 R f
          669 ( which is a mixture of text processing and)8 1790(is the obvious language for the task,)6 1524 2 1726 7082 t
          670 ( the input is an array of lines, as)8 1345( Since)1 281(geometry, hence arithmetic.)2 1132 3 720 7202 t
          671 10 CW f
          672 (awk)3511 7202 w
          673 10 R f
          674 (expects, the job should be fairly)5 1316 1 3724 7202 t
          675 cleartomark
          676 showpage
          677 saveobj restore
          678 %%EndPage: 1 1
          679 %%Page: 2 2
          680 /saveobj save def
          681 mark
          682 2 pagesetup
          683 10 R f
          684 (\255 2 \255)2 166 1 2797 480 t
          685 ( is an)2 211( Here)1 243(easy, and in fact it is.)5 846 3 720 840 t
          686 10 CW f
          687 (awk)2045 840 w
          688 10 R f
          689 (program for the job:)3 807 1 2250 840 t
          690 10 CW f
          691 (BEGIN{)1080 1020 w
          692 (y=1)1330 1140 w
          693 (})1080 1260 w
          694 (/^/{)1080 1380 w
          695 (for\(x=1; x<=length\($0\); x++\))2 1680 1 1330 1500 t
          696 (if\(substr\($0, x, 1\)=="#"\))2 1500 1 1580 1620 t
          697 (print "rect", x, y, x+1, y+1)5 1680 1 1830 1740 t
          698 (y++)1330 1860 w
          699 (})1080 1980 w
          700 10 R f
          701 (Although it is certainly easy to write, there is something odd about this program: the line\255driven nature of)17 4320 1 720 2160 t
          702 10 CW f
          703 (awk)720 2280 w
          704 10 R f
          705 (results in only one obvious advantage)5 1512 1 926 2280 t
          706 10 S1 f
          707 (\320)2464 2280 w
          708 10 R f
          709 (the ease of tracking)3 781 1 2590 2280 t
          710 10 CW f
          711 (y)3397 2280 w
          712 10 R f
          713 ( breaking out the pieces of)5 1056( task of)2 296(. The)1 231 3 3457 2280 t
          714 ( simple procedural code that does not use any advanced technology such as)12 3077(the line is left to explicit code,)6 1243 2 720 2400 t
          715 ( peculiarity becomes more evident if the problem is)8 2234( This)1 250( manipulation.)1 600(regular expressions for string)3 1236 4 720 2520 t
          716 (rephrased to demand that each horizontal run of rectangles be folded into a single rectangle:)14 3669 1 720 2640 t
          717 10 CW f
          718 (BEGIN{)1080 2820 w
          719 (y=1)1330 2940 w
          720 (})1080 3060 w
          721 (/^/{)1080 3180 w
          722 (for\(x=1; x<=length\($0\); x++\))2 1680 1 1330 3300 t
          723 (if\(substr\($0, x, 1\)=="#"\){)2 1560 1 1580 3420 t
          724 (x0=x;)1830 3540 w
          725 (while\(++x<=length\($0\) && substr\($0, x, 1\)=="#"\))4 2820 1 1830 3660 t
          726 (;)2080 3780 w
          727 (print "rect", x0, y, x, y+1)5 1620 1 1830 3900 t
          728 (})1580 4020 w
          729 (y++)1330 4140 w
          730 (})1080 4260 w
          731 10 R f
          732 ( In)1 133(Here a considerable amount of code is being spent to do a job a regular expression could do very simply.)19 4187 2 720 4440 t
          733 (fact, the only regular expression in the program is)8 2044 1 720 4560 t
          734 10 CW f
          735 (^)2796 4560 w
          736 10 R f
          737 ( ver\255)1 191( \(Newer)1 354( input.)1 262(, which is almost irrelevant to the)6 1377 4 2856 4560 t
          738 (sions of)1 324 1 720 4680 t
          739 10 CW f
          740 (awk)1079 4680 w
          741 10 R f
          742 (have mechanisms to use regular expressions within actions, but even there the relationship)12 3746 1 1294 4680 t
          743 (between the patterns that match text and the actions that manipulate the text is still too weak.\))16 3743 1 720 4800 t
          744 10 CW f
          745 (Awk's)970 4956 w
          746 10 R f
          747 (patterns)1302 4956 w
          748 10 S1 f
          749 (\320)1650 4956 w
          750 10 R f
          751 ( in slashes)2 427(the text)1 304 2 1782 4956 t
          752 10 CW f
          753 (//)2546 4956 w
          754 10 R f
          755 (that select the input on which to run the actions, the pro\255)11 2341 1 2699 4956 t
          756 ( braces)1 280(grams in the)2 498 2 720 5076 t
          757 10 CW f
          758 ({})1524 5076 w
          759 10 S1 f
          760 (\320)1670 5076 w
          761 10 R f
          762 ( But)1 196(pass to the actions the entire line containing the text matched by the pattern.)13 3048 2 1796 5076 t
          763 ( that)1 176( Imagine)1 378( can only be a line.)5 759(much of the power of this idea is being wasted, since the matched text)13 2801 4 720 5196 t
          764 10 CW f
          765 (awk)4860 5196 w
          766 10 R f
          767 ( so the patterns instead passed precisely the text they matched, with no implicit line bound\255)15 3761(were changed)1 559 2 720 5316 t
          768 ( first program could then be written:)6 1448(aries. Our)1 418 2 720 5436 t
          769 cleartomark
          770 showpage
          771 saveobj restore
          772 %%EndPage: 2 2
          773 %%Page: 3 3
          774 /saveobj save def
          775 mark
          776 3 pagesetup
          777 10 R f
          778 (\255 3 \255)2 166 1 2797 480 t
          779 10 CW f
          780 (BEGIN{)1080 900 w
          781 (x=1)1330 1020 w
          782 (y=1)1330 1140 w
          783 (})1080 1260 w
          784 (/ /{)1 240 1 1080 1380 t
          785 (x++)1330 1500 w
          786 (})1080 1620 w
          787 (/#/{)1080 1740 w
          788 (print "rect", x, x+1, y, y+1)5 1680 1 1330 1860 t
          789 (x++)1330 1980 w
          790 (})1080 2100 w
          791 (/\\n/{)1080 2220 w
          792 (x=1)1330 2340 w
          793 (y++)1330 2460 w
          794 (})1080 2580 w
          795 10 R f
          796 ( regular expressions to break out complete strings of blanks and)10 2606(and the second version could use)5 1342 2 720 2760 t
          797 10 CW f
          798 (#)4699 2760 w
          799 10 R f
          800 ('s sim\255)1 281 1 4759 2760 t
          801 (ply:)720 2880 w
          802 10 CW f
          803 (BEGIN{)1080 3060 w
          804 (x=1)1330 3180 w
          805 (y=1)1330 3300 w
          806 (})1080 3420 w
          807 (/ +/{)1 300 1 1080 3540 t
          808 (x+=length\($0\))1330 3660 w
          809 (})1080 3780 w
          810 (/#+/{)1080 3900 w
          811 (print "rect", x, x+length\($0\), y, y+1)5 2220 1 1330 4020 t
          812 (x+=length\($0\))1330 4140 w
          813 (})1080 4260 w
          814 (/\\n/{)1080 4380 w
          815 (x=1)1330 4500 w
          816 (y++)1330 4620 w
          817 (})1080 4740 w
          818 10 R f
          819 ( are)1 148(In these programs, regular expressions are being used to do more than just select the input, the way they)18 4172 2 720 4920 t
          820 (used in all the traditional)4 1050 1 720 5040 t
          821 9 R f
          822 (UNIX)1806 5040 w
          823 10 R f
          824 ( the expressions are doing a simple parsing \(or at least a)11 2375(tools. Instead,)1 596 2 2069 5040 t
          825 ( expressions are called)3 900( Such)1 250(breaking into lexical tokens\) of the input.)6 1651 3 720 5160 t
          826 10 I f
          827 (structural regular expressions)2 1213 1 3547 5160 t
          828 10 R f
          829 (or just)1 254 1 4786 5160 t
          830 10 I f
          831 (structural expressions.)1 911 1 720 5280 t
          832 10 R f
          833 ( notably shorter than the originals, but they are conceptually simpler, because)11 3125(These programs are not)3 945 2 970 5436 t
          834 ( The)1 208(the structure of the input is expressed in the structure of the programs, rather than in procedural code.)17 4112 2 720 5556 t
          835 ( between the patterns and the actions: the patterns select portions of the input)13 3102(labor has been cleanly divided)4 1218 2 720 5676 t
          836 ( actions contain no code to disassemble the input.)8 1979( The)1 205(while the actions operate on them.)5 1370 3 720 5796 t
          837 (The lexical analysis generator)3 1233 1 970 5952 t
          838 10 CW f
          839 (lex)2241 5952 w
          840 10 R f
          841 ( but its)2 301(uses regular expressions to define the structure of text,)8 2280 2 2459 5952 t
          842 ( \(its output must be run through the C)8 1594(implementation is poor, and since it is not an interactive program)10 2726 2 720 6072 t
          843 ( conve\255)1 302( even ignoring issues of speed and)6 1400( But)1 200(compiler\) it has largely been forgotten as a day\255to\255day tool.)9 2418 4 720 6192 t
          844 (nience,)720 6312 w
          845 10 CW f
          846 (lex)1041 6312 w
          847 10 R f
          848 ( the next)2 364( As)1 171( structural expressions.)2 938(still misses out on one of the most important aspects of)10 2310 4 1257 6312 t
          849 ( be nested to describe the structure of a file recursively, with)11 2510(section illustrates, structural expressions can)4 1810 2 720 6432 t
          850 (surprising results.)1 711 1 720 6552 t
          851 10 B f
          852 (Interactive Text Editing)2 1027 1 720 6792 t
          853 10 R f
          854 (It is ironic that)3 589 1 970 6948 t
          855 9 R f
          856 (UNIX)1583 6948 w
          857 10 R f
          858 ( typ\255)1 188(files are uninterpreted byte streams, yet the style of programming that most)11 3018 2 1834 6948 t
          859 (ifies)720 7068 w
          860 9 R f
          861 (UNIX)925 7068 w
          862 10 R f
          863 ( imposed on files)3 713(has a fairly rigid structure)4 1071 2 1185 7068 t
          864 10 S1 f
          865 (\320)3003 7068 w
          866 10 R f
          867 ( silent limits)2 514( \(The)1 247(arrays of not\255too\255long lines.)3 1142 3 3137 7068 t
          868 ( the)1 153( Although)1 434( line lengths by most tools can be frustrating.\))8 1883(placed on)1 390 4 720 7188 t
          869 10 CW f
          870 (awk)3611 7188 w
          871 10 R f
          872 (variant introduced above does)3 1218 1 3822 7188 t
          873 (not exist, there is an interactive text editor,)7 1706 1 720 7308 t
          874 10 CW f
          875 (sam)2451 7308 w
          876 10 R f
          877 (, that treats its files as simple byte streams.)8 1710 1 2631 7308 t
          878 cleartomark
          879 showpage
          880 saveobj restore
          881 %%EndPage: 3 3
          882 %%Page: 4 4
          883 /saveobj save def
          884 mark
          885 4 pagesetup
          886 10 R f
          887 (\255 4 \255)2 166 1 2797 480 t
          888 (The)970 840 w
          889 10 CW f
          890 (sam)1153 840 w
          891 10 R f
          892 (command language looks much like that of)6 1744 1 1361 840 t
          893 10 CW f
          894 (ed)3133 840 w
          895 10 R f
          896 (, but the details are different because)6 1483 1 3253 840 t
          897 10 CW f
          898 (sam)4764 840 w
          899 10 R f
          900 (is)4973 840 w
          901 ( example, the simple address)4 1151( For)1 189(not line\255oriented.)1 688 3 720 960 t
          902 10 CW f
          903 (/string/)1080 1140 w
          904 10 R f
          905 ( there are short\255)3 646( Although)1 434( not the next line containing ``string''.)6 1565(matches the next occurrence of ``string'',)5 1675 4 720 1320 t
          906 (hands to simplify common actions, the idea of a line must be stated explicitly in)14 3196 1 720 1440 t
          907 10 CW f
          908 (sam)3941 1440 w
          909 10 R f
          910 (.)4121 1440 w
          911 10 CW f
          912 (Sam)970 1596 w
          913 10 R f
          914 (has the same simple text addition and modification commands)8 2509 1 1177 1596 t
          915 10 CW f
          916 (ed)3713 1596 w
          917 10 R f
          918 (has:)3860 1596 w
          919 10 CW f
          920 (a)4048 1596 w
          921 10 R f
          922 (adds text after the cur\255)4 905 1 4135 1596 t
          923 (rent location,)1 527 1 720 1716 t
          924 10 CW f
          925 (i)1272 1716 w
          926 10 R f
          927 (adds text before it,)3 743 1 1357 1716 t
          928 10 CW f
          929 (d)2125 1716 w
          930 10 R f
          931 (deletes it, and)2 552 1 2210 1716 t
          932 10 CW f
          933 (c)2787 1716 w
          934 10 R f
          935 (replaces it.)1 432 1 2872 1716 t
          936 (Unlike in)1 376 1 970 1872 t
          937 10 CW f
          938 (ed)1372 1872 w
          939 10 R f
          940 (, the current location in)4 933 1 1492 1872 t
          941 10 CW f
          942 (sam)2451 1872 w
          943 10 R f
          944 ( simplifies some)2 660( This)1 230( \(and usually isn't\) a line.)5 1031(need not be)2 462 4 2657 1872 t
          945 ( example,)1 397( For)1 198(operations considerably.)1 985 3 720 1992 t
          946 10 CW f
          947 (ed)2334 1992 w
          948 10 R f
          949 ( a file.)2 268(has several ways to delete all occurrences of a string in)10 2284 2 2488 1992 t
          950 (One method is)2 583 1 720 2112 t
          951 10 CW f
          952 (g/string/ s///g)1 900 1 1080 2292 t
          953 10 R f
          954 ( substitute command is used to delete text within a line, while a delete command is)15 3369(It is symptomatic that a)4 951 2 720 2472 t
          955 ( deleted contains a newline, this technique doesn't work.)8 2271( if the string to be)5 718( Also,)1 266(used to delete whole lines.)4 1065 4 720 2592 t
          956 ( just an array of characters, but some characters are more equal than others.\))13 3310(\(A file is)2 395 2 720 2712 t
          957 10 CW f
          958 (Sam)4496 2712 w
          959 10 R f
          960 (is more)1 318 1 4722 2712 t
          961 (forthright:)720 2832 w
          962 10 CW f
          963 (x/string/d)1080 3012 w
          964 10 R f
          965 (The)720 3192 w
          966 10 CW f
          967 (x)905 3192 w
          968 10 R f
          969 ( runs the subsequent command)4 1256(\(`extract'\) command searches for each occurrence of the pattern, and)9 2789 2 995 3192 t
          970 ( that this is subtly different)5 1075( Note)1 244( \(not to the line containing the match\).)7 1532(with the current text set to the match)7 1469 4 720 3312 t
          971 (from)720 3432 w
          972 10 CW f
          973 (ed)940 3432 w
          974 10 R f
          975 ('s)1060 3432 w
          976 10 CW f
          977 (g)1159 3432 w
          978 10 R f
          979 (command:)1246 3432 w
          980 10 CW f
          981 (x)1695 3432 w
          982 10 R f
          983 (extracts the complete text for the command,)6 1767 1 1782 3432 t
          984 10 CW f
          985 (g)3576 3432 w
          986 10 R f
          987 ( is also)2 282( There)1 284(merely selects lines.)2 811 3 3663 3432 t
          988 (a complement to)2 666 1 720 3552 t
          989 10 CW f
          990 (x)1411 3552 w
          991 10 R f
          992 (, called)1 288 1 1471 3552 t
          993 10 CW f
          994 (y)1784 3552 w
          995 10 R f
          996 (, that extracts the pieces)4 956 1 1844 3552 t
          997 10 I f
          998 (between)2825 3552 w
          999 10 R f
         1000 (the matches of the pattern.)4 1056 1 3177 3552 t
         1001 (The)970 3708 w
         1002 10 CW f
         1003 (x)1151 3708 w
         1004 10 R f
         1005 (command is a loop, and)4 956 1 1237 3708 t
         1006 10 CW f
         1007 (sam)2220 3708 w
         1008 10 R f
         1009 (has a corresponding conditional command, called)5 1990 1 2427 3708 t
         1010 10 CW f
         1011 (g)4444 3708 w
         1012 10 R f
         1013 (\(unrelated to)1 509 1 4531 3708 t
         1014 10 CW f
         1015 (ed)720 3828 w
         1016 10 R f
         1017 ('s)840 3828 w
         1018 10 CW f
         1019 (g)937 3828 w
         1020 10 R f
         1021 (\):)997 3828 w
         1022 10 CW f
         1023 (g/pattern/command)1080 4008 w
         1024 10 R f
         1025 ( that it does not loop, and it does not change)10 1783( Note)1 246( matches the pattern.)3 832(runs the command if the current text)6 1459 4 720 4188 t
         1026 ( lines con\255)2 424( the command to print all)5 1033( Hence)1 309(the current text; it merely selects whether a command will run.)10 2554 4 720 4308 t
         1027 (taining a string is)3 692 1 720 4428 t
         1028 10 CW f
         1029 (x/.*\\n/ g/string/p)1 1080 1 1080 4608 t
         1030 10 S1 f
         1031 (\320)720 4788 w
         1032 10 R f
         1033 ( reverse conditional is)3 891( The)1 209( contains the string.)3 795(extract all the lines, and print each one that)8 1740 4 848 4788 t
         1034 10 CW f
         1035 (v)4512 4788 w
         1036 10 R f
         1037 (, so to print)3 468 1 4572 4788 t
         1038 (all lines containing `rob' but not `robot':)6 1621 1 720 4908 t
         1039 10 CW f
         1040 (x/.*\\n/ g/rob/ v/robot/p)2 1440 1 1080 5088 t
         1041 10 R f
         1042 (A more dramatic example is to capitalize all occurrences of words `i':)11 2790 1 720 5268 t
         1043 10 CW f
         1044 (x/[A\255Za\255z]+/ g/i/ v/../ c/I/)3 1680 1 1080 5448 t
         1045 10 S1 f
         1046 (\320)720 5628 w
         1047 10 R f
         1048 ( more characters, and change the)5 1316(extract all the words, find those that contain `i', reject those with two or)13 2878 2 846 5628 t
         1049 ( people have overcome the dif\255)5 1253( Some)1 282(string to `I' \(borrowing a little syntax from the substitute command\).)10 2785 3 720 5748 t
         1050 ( expressions,)1 530(ficulty of selecting words or identifiers using regular expressions by adding notation to the)13 3790 2 720 5868 t
         1051 ( the precise definition of `identifier' is immutable in the implementation.)10 3006(which has the disadvantage that)4 1314 2 720 5988 t
         1052 (With)720 6108 w
         1053 10 CW f
         1054 (sam)945 6108 w
         1055 10 R f
         1056 (, the definition is part of the program and easy to change, although more long\255winded.)14 3442 1 1125 6108 t
         1057 (The program to capitalize `i's should be writable as)8 2057 1 970 6264 t
         1058 10 CW f
         1059 (x/[A\255Za\255z]+/ g/^i$/ c/I/)2 1440 1 1080 6444 t
         1060 10 R f
         1061 (That is, the definition of)4 977 1 720 6624 t
         1062 10 CW f
         1063 (^)1724 6624 w
         1064 10 R f
         1065 (and)1811 6624 w
         1066 10 CW f
         1067 ($)1982 6624 w
         1068 10 R f
         1069 ( compatibility and because of)4 1188( For)1 192( input.)1 259(should reflect the structure of the)5 1332 4 2069 6624 t
         1070 (some problems in the implementation, however,)5 1929 1 720 6744 t
         1071 10 CW f
         1072 (^)2674 6744 w
         1073 10 R f
         1074 (and)2759 6744 w
         1075 10 CW f
         1076 ($)2928 6744 w
         1077 10 R f
         1078 (in)3013 6744 w
         1079 10 CW f
         1080 (sam)3116 6744 w
         1081 10 R f
         1082 (always match line boundaries.)3 1209 1 3321 6744 t
         1083 (In)970 6900 w
         1084 10 CW f
         1085 (ed)1078 6900 w
         1086 10 R f
         1087 ( each global is still)4 754(, it would not be very useful to nest global commands because the `output' of)14 3088 2 1198 6900 t
         1088 ( However,)1 445(a line.)1 249 2 720 7020 t
         1089 10 CW f
         1090 (sam)1444 7020 w
         1091 10 R f
         1092 ( benefit comes from separating)4 1256( \(This)1 266('s extract commands can be nested effectively.)6 1894 3 1624 7020 t
         1093 ( problem of changing all occurrences of the variable)8 2131( the)1 152( Consider)1 416(the notions of looping and matching.\))5 1530 4 720 7140 t
         1094 10 CW f
         1095 (n)4980 7140 w
         1096 10 R f
         1097 (in a C program to some other name, say)8 1595 1 720 7260 t
         1098 10 CW f
         1099 (num)2340 7260 w
         1100 10 R f
         1101 ( method above will work)4 999(. The)1 230 2 2520 7260 t
         1102 10 S1 f
         1103 (\320)3774 7260 w
         1104 cleartomark
         1105 showpage
         1106 saveobj restore
         1107 %%EndPage: 4 4
         1108 %%Page: 5 5
         1109 /saveobj save def
         1110 mark
         1111 5 pagesetup
         1112 10 R f
         1113 (\255 5 \255)2 166 1 2797 480 t
         1114 10 CW f
         1115 (x/[a\255zA\255Z0\2559]+/ g/n/ v/../ c/num/)3 1980 1 1080 900 t
         1116 10 S1 f
         1117 (\320)720 1080 w
         1118 10 R f
         1119 ( are places in C where the `identifier')7 1508(except that there)2 663 2 847 1080 t
         1120 10 CW f
         1121 (n)3046 1080 w
         1122 10 R f
         1123 (occurs but not as a variable, in particular as the)9 1906 1 3134 1080 t
         1124 (constant)720 1200 w
         1125 10 CW f
         1126 (\\n)1081 1200 w
         1127 10 R f
         1128 ( cou\255)1 204( prevent incorrect changes, the command can be prefixed by a)10 2503( To)1 164(in characters or strings.)3 940 4 1229 1200 t
         1129 (ple of)1 230 1 720 1320 t
         1130 10 CW f
         1131 (y)975 1320 w
         1132 10 R f
         1133 (commands to weed out characters and strings:)6 1841 1 1060 1320 t
         1134 10 CW f
         1135 (y/".*"/ y/'.*'/ x/[a\255zA\255Z0\2559]+/ g/n/ v/../ c/num/)5 2940 1 1080 1500 t
         1136 10 R f
         1137 (This example illustrates the power of composing extractions and conditionals, but it is not artificial: it)15 4070 1 970 1716 t
         1138 (was encountered when editing a real program \(in fact,)8 2192 1 720 1836 t
         1139 10 CW f
         1140 (sam)2942 1836 w
         1141 10 R f
         1142 ( with shell pipe\255)3 659( is an obvious analogy)4 914(\). There)1 345 3 3122 1836 t
         1143 (lines, but these command)3 1019 1 720 1956 t
         1144 10 I f
         1145 (chains)1765 1956 w
         1146 10 R f
         1147 (are subtly)1 393 1 2052 1956 t
         1148 10 S1 f
         1149 (\320)2472 1956 w
         1150 10 R f
         1151 (and importantly)1 638 1 2599 1956 t
         1152 10 S1 f
         1153 (\320)3264 1956 w
         1154 10 R f
         1155 ( flows into)2 432( Data)1 240(different from pipelines.)2 977 3 3391 1956 t
         1156 ( chains, the data flow is implicit:)6 1338( In)1 138( pipeline and emerges transformed from the right end.)8 2194(the left end of a)4 650 4 720 2076 t
         1157 ( commands are operating on the same data \(except that the last element of the chain may modify the)18 4070(all the)1 250 2 720 2196 t
         1158 ( is being)2 345( What)1 269( flows through the chain.)4 1008(text\); the complete operation is done in place; and no data actually)11 2698 4 720 2316 t
         1159 ( in the)2 262(passed from link to link in the chain is a view of the data, until it looks right for the final command)21 4058 2 720 2436 t
         1160 ( data stays the same, only the structure is modified.)9 2045(chain. The)1 446 2 720 2556 t
         1161 10 B f
         1162 (More than one line, and less than one line)8 1771 1 720 2796 t
         1163 10 R f
         1164 (The standard)1 532 1 970 2952 t
         1165 9 R f
         1166 (UNIX)1539 2952 w
         1167 10 R f
         1168 (tools have difficulty handling several lines at a time, if they can do so at all.)15 3237 1 1803 2952 t
         1169 10 CW f
         1170 (Grep)720 3072 w
         1171 10 R f
         1172 (,)960 3072 w
         1173 10 CW f
         1174 (sort)1022 3072 w
         1175 10 R f
         1176 (and)1299 3072 w
         1177 10 CW f
         1178 (diff)1480 3072 w
         1179 10 R f
         1180 ( if they could operate on larger)6 1296(work on lines only, although it would be useful)8 1987 2 1757 3072 t
         1181 ( as a)2 197(pieces, such)1 491 2 720 3192 t
         1182 10 CW f
         1183 (refer)1443 3192 w
         1184 10 R f
         1185 (database.)1778 3192 w
         1186 10 CW f
         1187 (awk)2206 3192 w
         1188 10 R f
         1189 (can be tricked into accepting multiple\255line records, but then the)9 2619 1 2421 3192 t
         1190 ( sub\255pieces \(typically ordinary lines\) by explicit code.)7 2236(actions must break out the)4 1101 2 720 3312 t
         1191 10 CW f
         1192 (sed)4119 3312 w
         1193 10 R f
         1194 (has a unique and)3 704 1 4336 3312 t
         1195 (clumsy mechanism for manipulating multiple lines, which few have mastered.)9 3127 1 720 3432 t
         1196 ( a)1 84( Consider)1 426(Structural expressions make it easy to specify multiple\255line actions.)8 2812 3 970 3588 t
         1197 10 CW f
         1198 (refer)4332 3588 w
         1199 10 R f
         1200 (database,)4672 3588 w
         1201 ( percent sign and)3 685( line of a record begins with a)7 1210( Each)1 252(which has multi\255line records separated by blank lines.)7 2173 4 720 3708 t
         1202 ( the line:)2 366(a character indicating the type of information on)7 1981 2 720 3828 t
         1203 10 CW f
         1204 (A)3100 3828 w
         1205 10 R f
         1206 (for author,)1 429 1 3193 3828 t
         1207 10 CW f
         1208 (T)3655 3828 w
         1209 10 R f
         1210 ( with)1 211( Staying)1 364(for title, etc.)2 504 3 3748 3828 t
         1211 10 CW f
         1212 (sam)4860 3828 w
         1213 10 R f
         1214 (notation, the command to search a)5 1370 1 720 3948 t
         1215 10 CW f
         1216 (refer)2115 3948 w
         1217 10 R f
         1218 (database for all papers written by Bimmler is:)7 1828 1 2440 3948 t
         1219 10 CW f
         1220 (x/\(.+\\n\)+/ g/%A.*Bimmler/p)1 1560 1 1080 4128 t
         1221 10 S1 f
         1222 (\320)720 4308 w
         1223 10 R f
         1224 ( set of lines containing `Bimm\255)5 1257(break the file into non\255empty sequences of non\255empty lines and print any)11 2937 2 846 4308 t
         1225 ( be compatible with the other tools, a `)8 1572( \(To)1 198( after `%A'.)2 486(ler' on a line)3 522 4 720 4428 t
         1226 10 CW f
         1227 (.)3498 4428 w
         1228 10 R f
         1229 ( Except)1 331(' does not match a newline.\))5 1151 2 3558 4428 t
         1230 (for the structural expression, this is a regular)7 1836 1 720 4548 t
         1231 10 CW f
         1232 (grep)2589 4548 w
         1233 10 R f
         1234 ( that)1 184(operation, implying)1 797 2 2862 4548 t
         1235 10 CW f
         1236 (grep)3877 4548 w
         1237 10 R f
         1238 (could benefit from an)3 889 1 4151 4548 t
         1239 ( `stream)1 339( the short term, however, a)5 1147( In)1 149(additional regular expression to define the structure of its input.)9 2685 4 720 4668 t
         1240 10 CW f
         1241 (sam)720 4788 w
         1242 10 R f
         1243 (,' analogous to)2 591 1 900 4788 t
         1244 10 CW f
         1245 (sed,)1516 4788 w
         1246 10 R f
         1247 (would be convenient, and is currently being implemented.)7 2322 1 1781 4788 t
         1248 ( example, we can)3 713( For)1 196( search program.)2 681(The ability to compose expressions makes it easy to tune the)10 2480 4 970 4944 t
         1249 (select just the)2 544 1 720 5064 t
         1250 10 I f
         1251 (titles)1289 5064 w
         1252 10 R f
         1253 (of the papers written by Bimmler by applying another extraction:)9 2605 1 1509 5064 t
         1254 10 CW f
         1255 (x/\(.+\\n\)+/ g/%A.*Bimmler/ x/.*\\n/ g/%T/p)3 2400 1 1080 5244 t
         1256 10 R f
         1257 ( into individual lines, then prints the lines con\255)8 1912(This program breaks the records with author Bimmler back)8 2408 2 720 5424 t
         1258 (taining)720 5544 w
         1259 10 CW f
         1260 (%T)1023 5544 w
         1261 10 R f
         1262 (.)1143 5544 w
         1263 ( examples of multiple\255line components of files that may profitably be extracted,)11 3212(There are many other)3 858 2 970 5700 t
         1264 (such as C functions, messages in mail boxes, paragraphs in)9 2415 1 720 5820 t
         1265 10 CW f
         1266 (troff)3166 5820 w
         1267 10 R f
         1268 ( records in on\255line telephone)4 1162(input and)1 381 2 3497 5820 t
         1269 ( that, unlike in systems that define file structures)8 1948(books. Note)1 509 2 720 5940 t
         1270 10 I f
         1271 (a priori)1 310 1 3203 5940 t
         1272 10 R f
         1273 (, the structures are applied by the pro\255)7 1527 1 3513 5940 t
         1274 ( sometimes a C)3 644( means the structure can change from application to application;)9 2658( This)1 239(gram, not the data.)3 779 4 720 6060 t
         1275 ( and sometimes it is just a byte)7 1343(program is an array of functions, but sometimes it is an array of lines,)13 2977 2 720 6180 t
         1276 (stream.)720 6300 w
         1277 ( determine the appearance of their input,)6 1627(If the standard commands admitted a structural expression to)8 2443 2 970 6456 t
         1278 ( a version of)3 553(many currently annoying problems could become simple: imagine)7 2790 2 720 6576 t
         1279 10 CW f
         1280 (diff)4107 6576 w
         1281 10 R f
         1282 (that could print)2 649 1 4391 6576 t
         1283 ( or functions instead of changed lines, or a)8 1710(changed sentences)1 740 2 720 6696 t
         1284 10 CW f
         1285 (sort)3197 6696 w
         1286 10 R f
         1287 (that could sort a)3 647 1 3464 6696 t
         1288 10 CW f
         1289 (refer)4138 6696 w
         1290 10 R f
         1291 (database. The)1 575 1 4465 6696 t
         1292 (case of)1 281 1 720 6816 t
         1293 10 CW f
         1294 (sort)1028 6816 w
         1295 10 R f
         1296 ( the input records be described by a struc\255)8 1676(is particularly interesting: not only can the shape of)8 2069 2 1295 6816 t
         1297 ( current bewildering maze of options to control the)8 2062( The)1 209(tural expression, but also the shape of the sort key.)9 2049 3 720 6936 t
         1298 ( be largely replaced by a structural expression to extract the key from the record, with)15 3434(sort could in principle)3 886 2 720 7056 t
         1299 (multiple expressions to define multiple keys.)5 1794 1 720 7176 t
         1300 cleartomark
         1301 showpage
         1302 saveobj restore
         1303 %%EndPage: 5 5
         1304 %%Page: 6 6
         1305 /saveobj save def
         1306 mark
         1307 6 pagesetup
         1308 10 R f
         1309 (\255 6 \255)2 166 1 2797 480 t
         1310 10 B f
         1311 (The)720 840 w
         1312 10 CW f
         1313 (awk)912 840 w
         1314 10 B f
         1315 (of the future?)2 582 1 1117 840 t
         1316 10 R f
         1317 (It is entertaining to imagine a version of)7 1622 1 970 996 t
         1318 10 CW f
         1319 (awk)2621 996 w
         1320 10 R f
         1321 ( as discussed)2 524( First,)1 263(that applies these ideas throughout.)4 1423 3 2830 996 t
         1322 ( For)1 199( to the actions would be defined, rather than merely selected, by the patterns.)13 3196(earlier, the text passed)3 925 3 720 1116 t
         1323 (example,)720 1236 w
         1324 10 CW f
         1325 (/#+/ { print })3 840 1 1080 1416 t
         1326 10 R f
         1327 (would print only)2 667 1 720 1596 t
         1328 10 CW f
         1329 (#)1412 1596 w
         1330 10 R f
         1331 (characters; conventional)1 972 1 1497 1596 t
         1332 10 CW f
         1333 (awk)2494 1596 w
         1334 10 R f
         1335 (would instead print every line containing)5 1640 1 2699 1596 t
         1336 10 CW f
         1337 (#)4364 1596 w
         1338 10 R f
         1339 (characters.)4449 1596 w
         1340 ( of using the restrictive idea of a)7 1317( Instead)1 342( parsed.)1 314(Next, the expressions would define how the input is)8 2097 4 970 1752 t
         1341 ( instance, in)2 496( For)1 197( demarcate fields.)2 722(field separator, the iterations implied by closures in the expression can)10 2905 4 720 1872 t
         1342 (the program)1 485 1 720 1992 t
         1343 10 CW f
         1344 (/\(.+\\n\)+/ {)1 660 1 1080 2172 t
         1345 10 I f
         1346 (action)1800 2172 w
         1347 10 CW f
         1348 (})2110 2172 w
         1349 10 R f
         1350 ( lines, but the outermost closure \(the)6 1481(the action sees groups of)4 999 2 720 2352 t
         1351 10 CW f
         1352 (+)3229 2352 w
         1353 10 R f
         1354 (operator\) examines, and hence can extract,)5 1722 1 3318 2352 t
         1355 (the individual lines.)2 802 1 720 2472 t
         1356 10 CW f
         1357 (ed)1577 2472 w
         1358 10 R f
         1359 ( We)1 192( back\255referencing operators.)2 1128(uses parentheses to define sub\255expressions for its)6 1993 3 1727 2472 t
         1360 ( to define the `fields' in)5 980(can modify this idea)3 834 2 720 2592 t
         1361 10 CW f
         1362 (awk)2567 2592 w
         1363 10 R f
         1364 (, so)1 147 1 2747 2592 t
         1365 10 CW f
         1366 ($1)2927 2592 w
         1367 10 R f
         1368 (defines the first element of the closure \(the first)8 1960 1 3080 2592 t
         1369 (line\),)720 2712 w
         1370 10 CW f
         1371 ($2)961 2712 w
         1372 10 R f
         1373 ( arrays, so the)3 575( interestingly, the closures could generate indices for)7 2163( More)1 274(the second, and so on.)4 914 4 1114 2712 t
         1374 ( say,)1 191(fields would be called,)3 925 2 720 2832 t
         1375 10 CW f
         1376 (input[1])1869 2832 w
         1377 10 R f
         1378 (and so on, perhaps with the unadorned identifier)7 1986 1 2382 2832 t
         1379 10 CW f
         1380 (input)4401 2832 w
         1381 10 R f
         1382 (holding)4734 2832 w
         1383 ( generate multi\255dimensional)2 1163( has the advantage that nested closures can)7 1858( This)1 250(the original intact string.)3 1049 4 720 2952 t
         1384 ( is some subtlety involving the relationship between)7 2192( \(There)1 331(arrays, which is notationally clean.)4 1456 3 720 3072 t
         1385 10 CW f
         1386 (input)4740 3072 w
         1387 10 R f
         1388 (indices and the order of the closures in the pattern, but the details are not important here.\))16 3571 1 720 3192 t
         1389 (Finally, as in)2 524 1 970 3348 t
         1390 10 CW f
         1391 (sam)1521 3348 w
         1392 10 R f
         1393 ( expressions would be applicable to the output of structural expressions;)10 2910(, structural)1 429 2 1701 3348 t
         1394 ( following program computes)3 1185( The)1 205( actions.)1 333(that is, we would be able to nest structural expressions inside the)11 2597 4 720 3468 t
         1395 (how many pages of articles Bimmler has written:)7 1967 1 720 3588 t
         1396 10 CW f
         1397 ( break into records)3 1140(/\(.+\\n\)+/{ #)1 900 2 1080 3768 t
         1398 ( is Bimmler author? \(see text\))5 1800( #)1 300(input ~ /%A.*Bimmler/{)2 1320 3 1330 3888 t
         1399 ( extract page numbers)3 1260(/%P.*\([0\2559]+\)\255\([0\2559]+\)/{ #)1 1740 2 1580 4008 t
         1400 (pages+=input[2]\255input[1]+1)1830 4128 w
         1401 (})1580 4248 w
         1402 (})1330 4368 w
         1403 (})1080 4488 w
         1404 (END{)1080 4608 w
         1405 (print pages)1 660 1 1330 4728 t
         1406 (})1080 4848 w
         1407 10 R f
         1408 (Real)720 5028 w
         1409 10 CW f
         1410 (awk)935 5028 w
         1411 10 R f
         1412 ( \(that is, regular expressions\) only like)6 1582(uses patterns)1 520 2 1147 5028 t
         1413 10 CW f
         1414 (sam)3282 5028 w
         1415 10 R f
         1416 ('s)3462 5028 w
         1417 10 CW f
         1418 (g)3567 5028 w
         1419 10 R f
         1420 (command, but our)2 746 1 3660 5028 t
         1421 10 CW f
         1422 (awk)4439 5028 w
         1423 10 R f
         1424 ('s patterns)1 421 1 4619 5028 t
         1425 (are)720 5148 w
         1426 10 CW f
         1427 (x)871 5148 w
         1428 10 R f
         1429 ( is why in the pro\255)5 750( This)1 232( we need both to exploit structural expressions well.)8 2110(expressions. Obviously,)1 987 4 961 5148 t
         1430 (gram above the test for whether)5 1276 1 720 5268 t
         1431 10 CW f
         1432 (input)2024 5268 w
         1433 10 R f
         1434 (contains a paper by Bimmler must be written as an explicit pattern)11 2688 1 2352 5268 t
         1435 ( separated by a dash, which is how)7 1393( innermost pattern searches for lines containing two numbers)8 2451(match. The)1 476 3 720 5388 t
         1436 10 CW f
         1437 (refer)720 5508 w
         1438 10 R f
         1439 (stores the starting and ending pages of the article.)8 1977 1 1045 5508 t
         1440 ( real)1 178( The)1 209(This is a contrived example, of course, but it illustrates the basic ideas.)12 2875 3 970 5664 t
         1441 10 CW f
         1442 (awk)4261 5664 w
         1443 10 R f
         1444 (suffers from a)2 569 1 4471 5664 t
         1445 ( making the parsing actions of the)6 1366( would be improved by)4 939( It)1 114(mismatch between the patterns and the actions.)6 1901 4 720 5784 t
         1446 ( lan\255)1 185( A)1 127( pattern\255matching abilities available in the actions.)6 2047(patterns visible in the actions, and by having the)8 1961 4 720 5904 t
         1447 (guage with regular expressions should not base its text manipulation on a)11 2928 1 720 6024 t
         1448 10 CW f
         1449 (substr)3673 6024 w
         1450 10 R f
         1451 (function.)4058 6024 w
         1452 10 B f
         1453 (Comments)720 6264 w
         1454 10 R f
         1455 ( is a powerful and convenient, if unfa\255)7 1545(The use of regular expressions to describe the structure of files)10 2525 2 970 6420 t
         1456 ( current)1 308(miliar, way to address a number of difficulties the)8 2010 2 720 6540 t
         1457 9 R f
         1458 (UNIX)3062 6540 w
         1459 10 R f
         1460 ( is obviously around this)4 988( There)1 283(tools share.)1 456 3 3313 6540 t
         1461 ( all.)1 172(new notation a number of interesting problems, and I am not pretending to have addressed them)15 4148 2 720 6660 t
         1462 ( the possibilities,)2 678(Rather, I have skipped enthusiastically from example to example to indicate the breadth of)13 3642 2 720 6780 t
         1463 ( these ideas, and perhaps to)5 1117( hope is to encourage others to think about)8 1730( My)1 193(not the depth of the difficulties.)5 1280 4 720 6900 t
         1464 (apply them to old tools as well as new ones.)9 1760 1 720 7020 t
         1465 cleartomark
         1466 showpage
         1467 saveobj restore
         1468 %%EndPage: 6 6
         1469 %%Page: 7 7
         1470 /saveobj save def
         1471 mark
         1472 7 pagesetup
         1473 10 R f
         1474 (\255 7 \255)2 166 1 2797 480 t
         1475 10 B f
         1476 (Acknowledgements)720 840 w
         1477 10 R f
         1478 ( some of their ideas)4 806(John Linderman, Chris Van Wyk, Tom Duff and Norman Wilson will recognize)11 3264 2 970 996 t
         1479 ( hope I have not misrepresented them.)6 1522( I)1 83(in these notes.)2 569 3 720 1116 t
         1480 cleartomark
         1481 showpage
         1482 saveobj restore
         1483 %%EndPage: 7 7
         1484 %%Trailer
         1485 done
         1486 %%Pages: 7
         1487 %%DocumentFonts: Times-Roman Times-Bold Times-Italic Times-Roman Courier