se.ps - sam - An updated version of the sam text editor.
(HTM) git clone git://vernunftzentrum.de/sam.git
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) LICENSE
---
se.ps (41645B)
---
1 %!PS
2 %%Version: 3.3.2
3 %%DocumentFonts: (atend)
4 %%Pages: (atend)
5 %%EndComments
6 %
7 % Version 3.3.2 prologue for troff files.
8 %
9
10 /#copies 1 store
11 /aspectratio 1 def
12 /formsperpage 1 def
13 /landscape false def
14 /linewidth .3 def
15 /magnification 1 def
16 /margin 0 def
17 /orientation 0 def
18 /resolution 720 def
19 /rotation 1 def
20 /xoffset 0 def
21 /yoffset 0 def
22
23 /roundpage true def
24 /useclippath true def
25 /pagebbox [0 0 612 792] def
26
27 /R /Times-Roman def
28 /I /Times-Italic def
29 /B /Times-Bold def
30 /BI /Times-BoldItalic def
31 /H /Helvetica def
32 /HI /Helvetica-Oblique def
33 /HB /Helvetica-Bold def
34 /HX /Helvetica-BoldOblique def
35 /CW /Courier def
36 /CO /Courier def
37 /CI /Courier-Oblique def
38 /CB /Courier-Bold def
39 /CX /Courier-BoldOblique def
40 /PA /Palatino-Roman def
41 /PI /Palatino-Italic def
42 /PB /Palatino-Bold def
43 /PX /Palatino-BoldItalic def
44 /Hr /Helvetica-Narrow def
45 /Hi /Helvetica-Narrow-Oblique def
46 /Hb /Helvetica-Narrow-Bold def
47 /Hx /Helvetica-Narrow-BoldOblique def
48 /KR /Bookman-Light def
49 /KI /Bookman-LightItalic def
50 /KB /Bookman-Demi def
51 /KX /Bookman-DemiItalic def
52 /AR /AvantGarde-Book def
53 /AI /AvantGarde-BookOblique def
54 /AB /AvantGarde-Demi def
55 /AX /AvantGarde-DemiOblique def
56 /NR /NewCenturySchlbk-Roman def
57 /NI /NewCenturySchlbk-Italic def
58 /NB /NewCenturySchlbk-Bold def
59 /NX /NewCenturySchlbk-BoldItalic def
60 /ZD /ZapfDingbats def
61 /ZI /ZapfChancery-MediumItalic def
62 /S /S def
63 /S1 /S1 def
64 /GR /Symbol def
65
66 /inch {72 mul} bind def
67 /min {2 copy gt {exch} if pop} bind def
68
69 /show {show} bind def % so later references don't bind
70 /widthshow {widthshow} bind def
71 /stringwidth {stringwidth} bind def
72
73 /setup {
74 counttomark 2 idiv {def} repeat pop
75
76 landscape {/orientation 90 orientation add def} if
77 /scaling 72 resolution div def
78 linewidth setlinewidth
79 1 setlinecap
80
81 pagedimensions
82 xcenter ycenter translate
83 orientation rotation mul rotate
84 width 2 div neg height 2 div translate
85 xoffset inch yoffset inch neg translate
86 margin 2 div dup neg translate
87 magnification dup aspectratio mul scale
88 scaling scaling scale
89
90 addmetrics
91 0 0 moveto
92 } def
93
94 /pagedimensions {
95 useclippath userdict /gotpagebbox known not and {
96 /pagebbox [clippath pathbbox newpath] def
97 roundpage currentdict /roundpagebbox known and {roundpagebbox} if
98 } if
99 pagebbox aload pop
100 4 -1 roll exch 4 1 roll 4 copy
101 landscape {4 2 roll} if
102 sub /width exch def
103 sub /height exch def
104 add 2 div /xcenter exch def
105 add 2 div /ycenter exch def
106 userdict /gotpagebbox true put
107 } def
108
109 /addmetrics {
110 /Symbol /S null Sdefs cf
111 /Times-Roman /S1 StandardEncoding dup length array copy S1defs cf
112 } def
113
114 /pagesetup {
115 /page exch def
116 currentdict /pagedict known currentdict page known and {
117 page load pagedict exch get cvx exec
118 } if
119 } def
120
121 /decodingdefs [
122 {counttomark 2 idiv {y moveto show} repeat}
123 {neg /y exch def counttomark 2 idiv {y moveto show} repeat}
124 {neg moveto {2 index stringwidth pop sub exch div 0 32 4 -1 roll widthshow} repeat}
125 {neg moveto {spacewidth sub 0.0 32 4 -1 roll widthshow} repeat}
126 {counttomark 2 idiv {y moveto show} repeat}
127 {neg setfunnytext}
128 ] def
129
130 /setdecoding {/t decodingdefs 3 -1 roll get bind def} bind def
131
132 /w {neg moveto show} bind def
133 /m {neg dup /y exch def moveto} bind def
134 /done {/lastpage where {pop lastpage} if} def
135
136 /f {
137 dup /font exch def findfont exch
138 dup /ptsize exch def scaling div dup /size exch def scalefont setfont
139 linewidth ptsize mul scaling 10 mul div setlinewidth
140 /spacewidth ( ) stringwidth pop def
141 } bind def
142
143 /changefont {
144 /fontheight exch def
145 /fontslant exch def
146 currentfont [
147 1 0
148 fontheight ptsize div fontslant sin mul fontslant cos div
149 fontheight ptsize div
150 0 0
151 ] makefont setfont
152 } bind def
153
154 /sf {f} bind def
155
156 /cf {
157 dup length 2 idiv
158 /entries exch def
159 /chtab exch def
160 /newencoding exch def
161 /newfont exch def
162
163 findfont dup length 1 add dict
164 /newdict exch def
165 {1 index /FID ne {newdict 3 1 roll put}{pop pop} ifelse} forall
166
167 newencoding type /arraytype eq {newdict /Encoding newencoding put} if
168
169 newdict /Metrics entries dict put
170 newdict /Metrics get
171 begin
172 chtab aload pop
173 1 1 entries {pop def} for
174 newfont newdict definefont pop
175 end
176 } bind def
177
178 %
179 % A few arrays used to adjust reference points and character widths in some
180 % of the printer resident fonts. If square roots are too high try changing
181 % the lines describing /radical and /radicalex to,
182 %
183 % /radical [0 -75 550 0]
184 % /radicalex [-50 -75 500 0]
185 %
186 % Move braceleftbt a bit - default PostScript character is off a bit.
187 %
188
189 /Sdefs [
190 /bracketlefttp [201 500]
191 /bracketleftbt [201 500]
192 /bracketrighttp [-81 380]
193 /bracketrightbt [-83 380]
194 /braceleftbt [203 490]
195 /bracketrightex [220 -125 500 0]
196 /radical [0 0 550 0]
197 /radicalex [-50 0 500 0]
198 /parenleftex [-20 -170 0 0]
199 /integral [100 -50 500 0]
200 /infinity [10 -75 730 0]
201 ] def
202
203 /S1defs [
204 /underscore [0 80 500 0]
205 /endash [7 90 650 0]
206 ] def
207 %
208 % Tries to round clipping path dimensions, as stored in array pagebbox, so they
209 % match one of the known sizes in the papersizes array. Lower left coordinates
210 % are always set to 0.
211 %
212
213 /roundpagebbox {
214 7 dict begin
215 /papersizes [8.5 inch 11 inch 14 inch 17 inch] def
216
217 /mappapersize {
218 /val exch def
219 /slop .5 inch def
220 /diff slop def
221 /j 0 def
222 0 1 papersizes length 1 sub {
223 /i exch def
224 papersizes i get val sub abs
225 dup diff le {/diff exch def /j i def} {pop} ifelse
226 } for
227 diff slop lt {papersizes j get} {val} ifelse
228 } def
229
230 pagebbox 0 0 put
231 pagebbox 1 0 put
232 pagebbox dup 2 get mappapersize 2 exch put
233 pagebbox dup 3 get mappapersize 3 exch put
234 end
235 } bind def
236
237 %%EndProlog
238 %%BeginSetup
239 mark
240 /linewidth 0.5 def
241 /#copies 1 store
242 /landscape false def
243 /resolution 720 def
244 %
245 % Encoding vector and redefinition of findfont for the ISO Latin1 standard.
246 % The 18 characters missing from ROM based fonts on older printers are noted
247 % below.
248 %
249
250 /ISOLatin1Encoding [
251 /.notdef
252 /.notdef
253 /.notdef
254 /.notdef
255 /.notdef
256 /.notdef
257 /.notdef
258 /.notdef
259 /.notdef
260 /.notdef
261 /.notdef
262 /.notdef
263 /.notdef
264 /.notdef
265 /.notdef
266 /.notdef
267 /.notdef
268 /.notdef
269 /.notdef
270 /.notdef
271 /.notdef
272 /.notdef
273 /.notdef
274 /.notdef
275 /.notdef
276 /.notdef
277 /.notdef
278 /.notdef
279 /.notdef
280 /.notdef
281 /.notdef
282 /.notdef
283 /space
284 /exclam
285 /quotedbl
286 /numbersign
287 /dollar
288 /percent
289 /ampersand
290 /quoteright
291 /parenleft
292 /parenright
293 /asterisk
294 /plus
295 /comma
296 /minus
297 /period
298 /slash
299 /zero
300 /one
301 /two
302 /three
303 /four
304 /five
305 /six
306 /seven
307 /eight
308 /nine
309 /colon
310 /semicolon
311 /less
312 /equal
313 /greater
314 /question
315 /at
316 /A
317 /B
318 /C
319 /D
320 /E
321 /F
322 /G
323 /H
324 /I
325 /J
326 /K
327 /L
328 /M
329 /N
330 /O
331 /P
332 /Q
333 /R
334 /S
335 /T
336 /U
337 /V
338 /W
339 /X
340 /Y
341 /Z
342 /bracketleft
343 /backslash
344 /bracketright
345 /asciicircum
346 /underscore
347 /quoteleft
348 /a
349 /b
350 /c
351 /d
352 /e
353 /f
354 /g
355 /h
356 /i
357 /j
358 /k
359 /l
360 /m
361 /n
362 /o
363 /p
364 /q
365 /r
366 /s
367 /t
368 /u
369 /v
370 /w
371 /x
372 /y
373 /z
374 /braceleft
375 /bar
376 /braceright
377 /asciitilde
378 /.notdef
379 /.notdef
380 /.notdef
381 /.notdef
382 /.notdef
383 /.notdef
384 /.notdef
385 /.notdef
386 /.notdef
387 /.notdef
388 /.notdef
389 /.notdef
390 /.notdef
391 /.notdef
392 /.notdef
393 /.notdef
394 /.notdef
395 /dotlessi
396 /grave
397 /acute
398 /circumflex
399 /tilde
400 /macron
401 /breve
402 /dotaccent
403 /dieresis
404 /.notdef
405 /ring
406 /cedilla
407 /.notdef
408 /hungarumlaut
409 /ogonek
410 /caron
411 /space
412 /exclamdown
413 /cent
414 /sterling
415 /currency
416 /yen
417 /brokenbar % missing
418 /section
419 /dieresis
420 /copyright
421 /ordfeminine
422 /guillemotleft
423 /logicalnot
424 /hyphen
425 /registered
426 /macron
427 /degree % missing
428 /plusminus % missing
429 /twosuperior % missing
430 /threesuperior % missing
431 /acute
432 /mu % missing
433 /paragraph
434 /periodcentered
435 /cedilla
436 /onesuperior % missing
437 /ordmasculine
438 /guillemotright
439 /onequarter % missing
440 /onehalf % missing
441 /threequarters % missing
442 /questiondown
443 /Agrave
444 /Aacute
445 /Acircumflex
446 /Atilde
447 /Adieresis
448 /Aring
449 /AE
450 /Ccedilla
451 /Egrave
452 /Eacute
453 /Ecircumflex
454 /Edieresis
455 /Igrave
456 /Iacute
457 /Icircumflex
458 /Idieresis
459 /Eth % missing
460 /Ntilde
461 /Ograve
462 /Oacute
463 /Ocircumflex
464 /Otilde
465 /Odieresis
466 /multiply % missing
467 /Oslash
468 /Ugrave
469 /Uacute
470 /Ucircumflex
471 /Udieresis
472 /Yacute % missing
473 /Thorn % missing
474 /germandbls
475 /agrave
476 /aacute
477 /acircumflex
478 /atilde
479 /adieresis
480 /aring
481 /ae
482 /ccedilla
483 /egrave
484 /eacute
485 /ecircumflex
486 /edieresis
487 /igrave
488 /iacute
489 /icircumflex
490 /idieresis
491 /eth % missing
492 /ntilde
493 /ograve
494 /oacute
495 /ocircumflex
496 /otilde
497 /odieresis
498 /divide % missing
499 /oslash
500 /ugrave
501 /uacute
502 /ucircumflex
503 /udieresis
504 /yacute % missing
505 /thorn % missing
506 /ydieresis
507 ] def
508
509 /NewFontDirectory FontDirectory maxlength dict def
510
511 %
512 % Apparently no guarantee findfont is defined in systemdict so the obvious
513 %
514 % systemdict /findfont get exec
515 %
516 % can generate an error. So far the only exception is a VT600 (version 48.0).
517 %
518
519 userdict /@RealFindfont known not {
520 userdict begin
521 /@RealFindfont systemdict begin /findfont load end def
522 end
523 } if
524
525 /findfont {
526 dup NewFontDirectory exch known not {
527 dup
528 %dup systemdict /findfont get exec % not always in systemdict
529 dup userdict /@RealFindfont get exec
530 dup /Encoding get StandardEncoding eq {
531 dup length dict begin
532 {1 index /FID ne {def}{pop pop} ifelse} forall
533 /Encoding ISOLatin1Encoding def
534 currentdict
535 end
536 /DummyFontName exch definefont
537 } if
538 NewFontDirectory 3 1 roll put
539 } if
540 NewFontDirectory exch get
541 } bind def
542
543 setup
544 2 setdecoding
545 %%EndSetup
546 %%Page: 1 1
547 /saveobj save def
548 mark
549 1 pagesetup
550 12 B f
551 (Structural Regular Expressions)2 1622 1 2069 1230 t
552 10 I f
553 (Rob Pike)1 363 1 2698 1470 t
554 10 R f
555 (AT&T Bell Laboratories)2 993 1 2383 1650 t
556 (Murray Hill, New Jersey 07974)4 1267 1 2246 1770 t
557 10 I f
558 (ABSTRACT)2643 2150 w
559 10 R f
560 (The current)1 465 1 1330 2410 t
561 9 R f
562 (UNIX)1821 2410 w
563 10 R f
564 ( the built\255in concept of a)5 999(\256 text processing tools are weakened by)6 1635 2 2046 2410 t
565 ( describe the `shape' of files when the typical)8 1908( is a simple notation that can)6 1222(line. There)1 470 3 1080 2530 t
566 ( regular)1 316( Using)1 298( is regular expressions.)3 942( notation)1 361( That)1 241(array\255of\255lines picture is inadequate.)3 1442 6 1080 2650 t
567 ( files has interesting)3 841(expressions to describe the structure in addition to the contents of)10 2759 2 1080 2770 t
568 (applications, and yields elegant methods for dealing with some problems the current tools)12 3600 1 1080 2890 t
569 ( are composed, the result is)5 1157( operations using these expressions)4 1464( When)1 303(handle clumsily.)1 676 4 1080 3010 t
570 (reminiscent of shell pipelines.)3 1199 1 1080 3130 t
571 10 B f
572 (The Peter\255On\255Silicon Problem)2 1299 1 720 3490 t
573 10 R f
574 ( model,)1 301(In the traditional)2 666 2 970 3646 t
575 9 R f
576 (UNIX)1961 3646 w
577 10 R f
578 (text files are arrays of lines, and all the familiar tools)10 2120 1 2212 3646 t
579 10 S1 f
580 (\320)4358 3646 w
581 10 CW f
582 (grep)4484 3646 w
583 10 R f
584 (,)4724 3646 w
585 10 CW f
586 (sort)4775 3646 w
587 10 R f
588 (,)5015 3646 w
589 10 CW f
590 (awk)720 3766 w
591 10 R f
592 (, etc.)1 197 1 900 3766 t
593 10 S1 f
594 (\320)1128 3766 w
595 10 R f
596 ( of)1 113( output)1 287( The)1 211(expect arrays of lines as input.)5 1244 4 1259 3766 t
597 10 CW f
598 (ls)3144 3766 w
599 10 R f
600 (\(regardless of options\) is a list of files, one)8 1746 1 3294 3766 t
601 (per line, that may be selected by tools such as)9 1825 1 720 3886 t
602 10 CW f
603 (grep)2570 3886 w
604 10 R f
605 (:)2810 3886 w
606 10 CW f
607 (ls \255l /usr/ken/bin | grep 'rws.*root')5 2220 1 1080 4066 t
608 10 R f
609 (\(I assume that the reader is familiar with the)8 1803 1 720 4246 t
610 9 R f
611 (UNIX)2551 4246 w
612 10 R f
613 ( model is powerful, but it is also pervasive,)8 1769(tools.\) The)1 464 2 2807 4246 t
614 ( Many)1 298(sometimes overly so.)2 877 2 720 4366 t
615 9 R f
616 (UNIX)1933 4366 w
617 10 R f
618 ( more general, and more useful, if they could be)9 2041(programs would be)2 801 2 2198 4366 t
619 ( example,)1 400( For)1 201(applied to arbitrarily structured input.)4 1549 3 720 4486 t
620 10 CW f
621 (diff)2907 4486 w
622 10 R f
623 ( C)1 105(could in principle report differences at the)6 1751 2 3184 4486 t
624 ( if the interesting quantum of information isn't a line, most of)11 2537( But)1 202( level.)1 251(function level instead of the line)5 1330 4 720 4606 t
625 (the tools \(including)2 804 1 720 4726 t
626 10 CW f
627 (diff)1562 4726 w
628 10 R f
629 ( solution so the line\255)4 873( perverting the)2 608( Worse,)1 348(\) don't help, or at best do poorly.)7 1409 4 1802 4726 t
630 (oriented tools can implement it often obscures the original problem.)9 2714 1 720 4846 t
631 ( consider the problem of turning)5 1320(To see how a line oriented view of text can introduce complication,)11 2750 2 970 5002 t
632 ( input is an array of blank and non\255blank characters, like this:)11 2451( The)1 205(Peter into silicon.)2 703 3 720 5122 t
633 10 CW f
634 (#######)1320 5252 w
635 (#########)1260 5322 w
636 (#### #####)1 660 1 1200 5392 t
637 ( #)1 180(#### ####)1 720 2 1140 5462 t
638 (#### #####)1 840 1 1140 5532 t
639 (#### ###)1 840 1 1080 5602 t
640 (######## #####)1 960 1 1080 5672 t
641 (#### #########)1 840 1 1080 5742 t
642 ( ####)1 300( #)1 180(#### #)1 360 3 1080 5812 t
643 ( ##)1 300( ###)1 300(## #)1 240 3 1080 5882 t
644 ( ###)1 300(### #)1 480 2 1080 5952 t
645 (### ##)1 540 1 1080 6022 t
646 (## #)1 360 1 1140 6092 t
647 (# ####)1 480 1 1200 6162 t
648 (# #)1 180 1 1200 6232 t
649 (## # ##)2 660 1 1080 6302 t
650 10 R f
651 (The output is to be statements in a language for laying out integrated circuits:)13 3094 1 720 6482 t
652 10 CW f
653 (rect minx miny maxx maxy)4 1440 1 1080 6662 t
654 10 R f
655 ( simplify the problem slightly,)4 1247( To)1 169(The statements encode where the non\255blank characters are in the input.)10 2904 3 720 6842 t
656 (the coordinate system has)3 1032 1 720 6962 t
657 10 I f
658 (x)1778 6962 w
659 10 R f
660 (positive to the right and)4 954 1 1848 6962 t
661 10 I f
662 (y)2828 6962 w
663 10 R f
664 ( output need not be efficient in its)7 1346( The)1 206(positive down.)1 590 3 2898 6962 t
665 (use of rectangles.)2 723 1 720 7082 t
666 10 CW f
667 (Awk)1507 7082 w
668 10 R f
669 ( which is a mixture of text processing and)8 1790(is the obvious language for the task,)6 1524 2 1726 7082 t
670 ( the input is an array of lines, as)8 1345( Since)1 281(geometry, hence arithmetic.)2 1132 3 720 7202 t
671 10 CW f
672 (awk)3511 7202 w
673 10 R f
674 (expects, the job should be fairly)5 1316 1 3724 7202 t
675 cleartomark
676 showpage
677 saveobj restore
678 %%EndPage: 1 1
679 %%Page: 2 2
680 /saveobj save def
681 mark
682 2 pagesetup
683 10 R f
684 (\255 2 \255)2 166 1 2797 480 t
685 ( is an)2 211( Here)1 243(easy, and in fact it is.)5 846 3 720 840 t
686 10 CW f
687 (awk)2045 840 w
688 10 R f
689 (program for the job:)3 807 1 2250 840 t
690 10 CW f
691 (BEGIN{)1080 1020 w
692 (y=1)1330 1140 w
693 (})1080 1260 w
694 (/^/{)1080 1380 w
695 (for\(x=1; x<=length\($0\); x++\))2 1680 1 1330 1500 t
696 (if\(substr\($0, x, 1\)=="#"\))2 1500 1 1580 1620 t
697 (print "rect", x, y, x+1, y+1)5 1680 1 1830 1740 t
698 (y++)1330 1860 w
699 (})1080 1980 w
700 10 R f
701 (Although it is certainly easy to write, there is something odd about this program: the line\255driven nature of)17 4320 1 720 2160 t
702 10 CW f
703 (awk)720 2280 w
704 10 R f
705 (results in only one obvious advantage)5 1512 1 926 2280 t
706 10 S1 f
707 (\320)2464 2280 w
708 10 R f
709 (the ease of tracking)3 781 1 2590 2280 t
710 10 CW f
711 (y)3397 2280 w
712 10 R f
713 ( breaking out the pieces of)5 1056( task of)2 296(. The)1 231 3 3457 2280 t
714 ( simple procedural code that does not use any advanced technology such as)12 3077(the line is left to explicit code,)6 1243 2 720 2400 t
715 ( peculiarity becomes more evident if the problem is)8 2234( This)1 250( manipulation.)1 600(regular expressions for string)3 1236 4 720 2520 t
716 (rephrased to demand that each horizontal run of rectangles be folded into a single rectangle:)14 3669 1 720 2640 t
717 10 CW f
718 (BEGIN{)1080 2820 w
719 (y=1)1330 2940 w
720 (})1080 3060 w
721 (/^/{)1080 3180 w
722 (for\(x=1; x<=length\($0\); x++\))2 1680 1 1330 3300 t
723 (if\(substr\($0, x, 1\)=="#"\){)2 1560 1 1580 3420 t
724 (x0=x;)1830 3540 w
725 (while\(++x<=length\($0\) && substr\($0, x, 1\)=="#"\))4 2820 1 1830 3660 t
726 (;)2080 3780 w
727 (print "rect", x0, y, x, y+1)5 1620 1 1830 3900 t
728 (})1580 4020 w
729 (y++)1330 4140 w
730 (})1080 4260 w
731 10 R f
732 ( In)1 133(Here a considerable amount of code is being spent to do a job a regular expression could do very simply.)19 4187 2 720 4440 t
733 (fact, the only regular expression in the program is)8 2044 1 720 4560 t
734 10 CW f
735 (^)2796 4560 w
736 10 R f
737 ( ver\255)1 191( \(Newer)1 354( input.)1 262(, which is almost irrelevant to the)6 1377 4 2856 4560 t
738 (sions of)1 324 1 720 4680 t
739 10 CW f
740 (awk)1079 4680 w
741 10 R f
742 (have mechanisms to use regular expressions within actions, but even there the relationship)12 3746 1 1294 4680 t
743 (between the patterns that match text and the actions that manipulate the text is still too weak.\))16 3743 1 720 4800 t
744 10 CW f
745 (Awk's)970 4956 w
746 10 R f
747 (patterns)1302 4956 w
748 10 S1 f
749 (\320)1650 4956 w
750 10 R f
751 ( in slashes)2 427(the text)1 304 2 1782 4956 t
752 10 CW f
753 (//)2546 4956 w
754 10 R f
755 (that select the input on which to run the actions, the pro\255)11 2341 1 2699 4956 t
756 ( braces)1 280(grams in the)2 498 2 720 5076 t
757 10 CW f
758 ({})1524 5076 w
759 10 S1 f
760 (\320)1670 5076 w
761 10 R f
762 ( But)1 196(pass to the actions the entire line containing the text matched by the pattern.)13 3048 2 1796 5076 t
763 ( that)1 176( Imagine)1 378( can only be a line.)5 759(much of the power of this idea is being wasted, since the matched text)13 2801 4 720 5196 t
764 10 CW f
765 (awk)4860 5196 w
766 10 R f
767 ( so the patterns instead passed precisely the text they matched, with no implicit line bound\255)15 3761(were changed)1 559 2 720 5316 t
768 ( first program could then be written:)6 1448(aries. Our)1 418 2 720 5436 t
769 cleartomark
770 showpage
771 saveobj restore
772 %%EndPage: 2 2
773 %%Page: 3 3
774 /saveobj save def
775 mark
776 3 pagesetup
777 10 R f
778 (\255 3 \255)2 166 1 2797 480 t
779 10 CW f
780 (BEGIN{)1080 900 w
781 (x=1)1330 1020 w
782 (y=1)1330 1140 w
783 (})1080 1260 w
784 (/ /{)1 240 1 1080 1380 t
785 (x++)1330 1500 w
786 (})1080 1620 w
787 (/#/{)1080 1740 w
788 (print "rect", x, x+1, y, y+1)5 1680 1 1330 1860 t
789 (x++)1330 1980 w
790 (})1080 2100 w
791 (/\\n/{)1080 2220 w
792 (x=1)1330 2340 w
793 (y++)1330 2460 w
794 (})1080 2580 w
795 10 R f
796 ( regular expressions to break out complete strings of blanks and)10 2606(and the second version could use)5 1342 2 720 2760 t
797 10 CW f
798 (#)4699 2760 w
799 10 R f
800 ('s sim\255)1 281 1 4759 2760 t
801 (ply:)720 2880 w
802 10 CW f
803 (BEGIN{)1080 3060 w
804 (x=1)1330 3180 w
805 (y=1)1330 3300 w
806 (})1080 3420 w
807 (/ +/{)1 300 1 1080 3540 t
808 (x+=length\($0\))1330 3660 w
809 (})1080 3780 w
810 (/#+/{)1080 3900 w
811 (print "rect", x, x+length\($0\), y, y+1)5 2220 1 1330 4020 t
812 (x+=length\($0\))1330 4140 w
813 (})1080 4260 w
814 (/\\n/{)1080 4380 w
815 (x=1)1330 4500 w
816 (y++)1330 4620 w
817 (})1080 4740 w
818 10 R f
819 ( are)1 148(In these programs, regular expressions are being used to do more than just select the input, the way they)18 4172 2 720 4920 t
820 (used in all the traditional)4 1050 1 720 5040 t
821 9 R f
822 (UNIX)1806 5040 w
823 10 R f
824 ( the expressions are doing a simple parsing \(or at least a)11 2375(tools. Instead,)1 596 2 2069 5040 t
825 ( expressions are called)3 900( Such)1 250(breaking into lexical tokens\) of the input.)6 1651 3 720 5160 t
826 10 I f
827 (structural regular expressions)2 1213 1 3547 5160 t
828 10 R f
829 (or just)1 254 1 4786 5160 t
830 10 I f
831 (structural expressions.)1 911 1 720 5280 t
832 10 R f
833 ( notably shorter than the originals, but they are conceptually simpler, because)11 3125(These programs are not)3 945 2 970 5436 t
834 ( The)1 208(the structure of the input is expressed in the structure of the programs, rather than in procedural code.)17 4112 2 720 5556 t
835 ( between the patterns and the actions: the patterns select portions of the input)13 3102(labor has been cleanly divided)4 1218 2 720 5676 t
836 ( actions contain no code to disassemble the input.)8 1979( The)1 205(while the actions operate on them.)5 1370 3 720 5796 t
837 (The lexical analysis generator)3 1233 1 970 5952 t
838 10 CW f
839 (lex)2241 5952 w
840 10 R f
841 ( but its)2 301(uses regular expressions to define the structure of text,)8 2280 2 2459 5952 t
842 ( \(its output must be run through the C)8 1594(implementation is poor, and since it is not an interactive program)10 2726 2 720 6072 t
843 ( conve\255)1 302( even ignoring issues of speed and)6 1400( But)1 200(compiler\) it has largely been forgotten as a day\255to\255day tool.)9 2418 4 720 6192 t
844 (nience,)720 6312 w
845 10 CW f
846 (lex)1041 6312 w
847 10 R f
848 ( the next)2 364( As)1 171( structural expressions.)2 938(still misses out on one of the most important aspects of)10 2310 4 1257 6312 t
849 ( be nested to describe the structure of a file recursively, with)11 2510(section illustrates, structural expressions can)4 1810 2 720 6432 t
850 (surprising results.)1 711 1 720 6552 t
851 10 B f
852 (Interactive Text Editing)2 1027 1 720 6792 t
853 10 R f
854 (It is ironic that)3 589 1 970 6948 t
855 9 R f
856 (UNIX)1583 6948 w
857 10 R f
858 ( typ\255)1 188(files are uninterpreted byte streams, yet the style of programming that most)11 3018 2 1834 6948 t
859 (ifies)720 7068 w
860 9 R f
861 (UNIX)925 7068 w
862 10 R f
863 ( imposed on files)3 713(has a fairly rigid structure)4 1071 2 1185 7068 t
864 10 S1 f
865 (\320)3003 7068 w
866 10 R f
867 ( silent limits)2 514( \(The)1 247(arrays of not\255too\255long lines.)3 1142 3 3137 7068 t
868 ( the)1 153( Although)1 434( line lengths by most tools can be frustrating.\))8 1883(placed on)1 390 4 720 7188 t
869 10 CW f
870 (awk)3611 7188 w
871 10 R f
872 (variant introduced above does)3 1218 1 3822 7188 t
873 (not exist, there is an interactive text editor,)7 1706 1 720 7308 t
874 10 CW f
875 (sam)2451 7308 w
876 10 R f
877 (, that treats its files as simple byte streams.)8 1710 1 2631 7308 t
878 cleartomark
879 showpage
880 saveobj restore
881 %%EndPage: 3 3
882 %%Page: 4 4
883 /saveobj save def
884 mark
885 4 pagesetup
886 10 R f
887 (\255 4 \255)2 166 1 2797 480 t
888 (The)970 840 w
889 10 CW f
890 (sam)1153 840 w
891 10 R f
892 (command language looks much like that of)6 1744 1 1361 840 t
893 10 CW f
894 (ed)3133 840 w
895 10 R f
896 (, but the details are different because)6 1483 1 3253 840 t
897 10 CW f
898 (sam)4764 840 w
899 10 R f
900 (is)4973 840 w
901 ( example, the simple address)4 1151( For)1 189(not line\255oriented.)1 688 3 720 960 t
902 10 CW f
903 (/string/)1080 1140 w
904 10 R f
905 ( there are short\255)3 646( Although)1 434( not the next line containing ``string''.)6 1565(matches the next occurrence of ``string'',)5 1675 4 720 1320 t
906 (hands to simplify common actions, the idea of a line must be stated explicitly in)14 3196 1 720 1440 t
907 10 CW f
908 (sam)3941 1440 w
909 10 R f
910 (.)4121 1440 w
911 10 CW f
912 (Sam)970 1596 w
913 10 R f
914 (has the same simple text addition and modification commands)8 2509 1 1177 1596 t
915 10 CW f
916 (ed)3713 1596 w
917 10 R f
918 (has:)3860 1596 w
919 10 CW f
920 (a)4048 1596 w
921 10 R f
922 (adds text after the cur\255)4 905 1 4135 1596 t
923 (rent location,)1 527 1 720 1716 t
924 10 CW f
925 (i)1272 1716 w
926 10 R f
927 (adds text before it,)3 743 1 1357 1716 t
928 10 CW f
929 (d)2125 1716 w
930 10 R f
931 (deletes it, and)2 552 1 2210 1716 t
932 10 CW f
933 (c)2787 1716 w
934 10 R f
935 (replaces it.)1 432 1 2872 1716 t
936 (Unlike in)1 376 1 970 1872 t
937 10 CW f
938 (ed)1372 1872 w
939 10 R f
940 (, the current location in)4 933 1 1492 1872 t
941 10 CW f
942 (sam)2451 1872 w
943 10 R f
944 ( simplifies some)2 660( This)1 230( \(and usually isn't\) a line.)5 1031(need not be)2 462 4 2657 1872 t
945 ( example,)1 397( For)1 198(operations considerably.)1 985 3 720 1992 t
946 10 CW f
947 (ed)2334 1992 w
948 10 R f
949 ( a file.)2 268(has several ways to delete all occurrences of a string in)10 2284 2 2488 1992 t
950 (One method is)2 583 1 720 2112 t
951 10 CW f
952 (g/string/ s///g)1 900 1 1080 2292 t
953 10 R f
954 ( substitute command is used to delete text within a line, while a delete command is)15 3369(It is symptomatic that a)4 951 2 720 2472 t
955 ( deleted contains a newline, this technique doesn't work.)8 2271( if the string to be)5 718( Also,)1 266(used to delete whole lines.)4 1065 4 720 2592 t
956 ( just an array of characters, but some characters are more equal than others.\))13 3310(\(A file is)2 395 2 720 2712 t
957 10 CW f
958 (Sam)4496 2712 w
959 10 R f
960 (is more)1 318 1 4722 2712 t
961 (forthright:)720 2832 w
962 10 CW f
963 (x/string/d)1080 3012 w
964 10 R f
965 (The)720 3192 w
966 10 CW f
967 (x)905 3192 w
968 10 R f
969 ( runs the subsequent command)4 1256(\(`extract'\) command searches for each occurrence of the pattern, and)9 2789 2 995 3192 t
970 ( that this is subtly different)5 1075( Note)1 244( \(not to the line containing the match\).)7 1532(with the current text set to the match)7 1469 4 720 3312 t
971 (from)720 3432 w
972 10 CW f
973 (ed)940 3432 w
974 10 R f
975 ('s)1060 3432 w
976 10 CW f
977 (g)1159 3432 w
978 10 R f
979 (command:)1246 3432 w
980 10 CW f
981 (x)1695 3432 w
982 10 R f
983 (extracts the complete text for the command,)6 1767 1 1782 3432 t
984 10 CW f
985 (g)3576 3432 w
986 10 R f
987 ( is also)2 282( There)1 284(merely selects lines.)2 811 3 3663 3432 t
988 (a complement to)2 666 1 720 3552 t
989 10 CW f
990 (x)1411 3552 w
991 10 R f
992 (, called)1 288 1 1471 3552 t
993 10 CW f
994 (y)1784 3552 w
995 10 R f
996 (, that extracts the pieces)4 956 1 1844 3552 t
997 10 I f
998 (between)2825 3552 w
999 10 R f
1000 (the matches of the pattern.)4 1056 1 3177 3552 t
1001 (The)970 3708 w
1002 10 CW f
1003 (x)1151 3708 w
1004 10 R f
1005 (command is a loop, and)4 956 1 1237 3708 t
1006 10 CW f
1007 (sam)2220 3708 w
1008 10 R f
1009 (has a corresponding conditional command, called)5 1990 1 2427 3708 t
1010 10 CW f
1011 (g)4444 3708 w
1012 10 R f
1013 (\(unrelated to)1 509 1 4531 3708 t
1014 10 CW f
1015 (ed)720 3828 w
1016 10 R f
1017 ('s)840 3828 w
1018 10 CW f
1019 (g)937 3828 w
1020 10 R f
1021 (\):)997 3828 w
1022 10 CW f
1023 (g/pattern/command)1080 4008 w
1024 10 R f
1025 ( that it does not loop, and it does not change)10 1783( Note)1 246( matches the pattern.)3 832(runs the command if the current text)6 1459 4 720 4188 t
1026 ( lines con\255)2 424( the command to print all)5 1033( Hence)1 309(the current text; it merely selects whether a command will run.)10 2554 4 720 4308 t
1027 (taining a string is)3 692 1 720 4428 t
1028 10 CW f
1029 (x/.*\\n/ g/string/p)1 1080 1 1080 4608 t
1030 10 S1 f
1031 (\320)720 4788 w
1032 10 R f
1033 ( reverse conditional is)3 891( The)1 209( contains the string.)3 795(extract all the lines, and print each one that)8 1740 4 848 4788 t
1034 10 CW f
1035 (v)4512 4788 w
1036 10 R f
1037 (, so to print)3 468 1 4572 4788 t
1038 (all lines containing `rob' but not `robot':)6 1621 1 720 4908 t
1039 10 CW f
1040 (x/.*\\n/ g/rob/ v/robot/p)2 1440 1 1080 5088 t
1041 10 R f
1042 (A more dramatic example is to capitalize all occurrences of words `i':)11 2790 1 720 5268 t
1043 10 CW f
1044 (x/[A\255Za\255z]+/ g/i/ v/../ c/I/)3 1680 1 1080 5448 t
1045 10 S1 f
1046 (\320)720 5628 w
1047 10 R f
1048 ( more characters, and change the)5 1316(extract all the words, find those that contain `i', reject those with two or)13 2878 2 846 5628 t
1049 ( people have overcome the dif\255)5 1253( Some)1 282(string to `I' \(borrowing a little syntax from the substitute command\).)10 2785 3 720 5748 t
1050 ( expressions,)1 530(ficulty of selecting words or identifiers using regular expressions by adding notation to the)13 3790 2 720 5868 t
1051 ( the precise definition of `identifier' is immutable in the implementation.)10 3006(which has the disadvantage that)4 1314 2 720 5988 t
1052 (With)720 6108 w
1053 10 CW f
1054 (sam)945 6108 w
1055 10 R f
1056 (, the definition is part of the program and easy to change, although more long\255winded.)14 3442 1 1125 6108 t
1057 (The program to capitalize `i's should be writable as)8 2057 1 970 6264 t
1058 10 CW f
1059 (x/[A\255Za\255z]+/ g/^i$/ c/I/)2 1440 1 1080 6444 t
1060 10 R f
1061 (That is, the definition of)4 977 1 720 6624 t
1062 10 CW f
1063 (^)1724 6624 w
1064 10 R f
1065 (and)1811 6624 w
1066 10 CW f
1067 ($)1982 6624 w
1068 10 R f
1069 ( compatibility and because of)4 1188( For)1 192( input.)1 259(should reflect the structure of the)5 1332 4 2069 6624 t
1070 (some problems in the implementation, however,)5 1929 1 720 6744 t
1071 10 CW f
1072 (^)2674 6744 w
1073 10 R f
1074 (and)2759 6744 w
1075 10 CW f
1076 ($)2928 6744 w
1077 10 R f
1078 (in)3013 6744 w
1079 10 CW f
1080 (sam)3116 6744 w
1081 10 R f
1082 (always match line boundaries.)3 1209 1 3321 6744 t
1083 (In)970 6900 w
1084 10 CW f
1085 (ed)1078 6900 w
1086 10 R f
1087 ( each global is still)4 754(, it would not be very useful to nest global commands because the `output' of)14 3088 2 1198 6900 t
1088 ( However,)1 445(a line.)1 249 2 720 7020 t
1089 10 CW f
1090 (sam)1444 7020 w
1091 10 R f
1092 ( benefit comes from separating)4 1256( \(This)1 266('s extract commands can be nested effectively.)6 1894 3 1624 7020 t
1093 ( problem of changing all occurrences of the variable)8 2131( the)1 152( Consider)1 416(the notions of looping and matching.\))5 1530 4 720 7140 t
1094 10 CW f
1095 (n)4980 7140 w
1096 10 R f
1097 (in a C program to some other name, say)8 1595 1 720 7260 t
1098 10 CW f
1099 (num)2340 7260 w
1100 10 R f
1101 ( method above will work)4 999(. The)1 230 2 2520 7260 t
1102 10 S1 f
1103 (\320)3774 7260 w
1104 cleartomark
1105 showpage
1106 saveobj restore
1107 %%EndPage: 4 4
1108 %%Page: 5 5
1109 /saveobj save def
1110 mark
1111 5 pagesetup
1112 10 R f
1113 (\255 5 \255)2 166 1 2797 480 t
1114 10 CW f
1115 (x/[a\255zA\255Z0\2559]+/ g/n/ v/../ c/num/)3 1980 1 1080 900 t
1116 10 S1 f
1117 (\320)720 1080 w
1118 10 R f
1119 ( are places in C where the `identifier')7 1508(except that there)2 663 2 847 1080 t
1120 10 CW f
1121 (n)3046 1080 w
1122 10 R f
1123 (occurs but not as a variable, in particular as the)9 1906 1 3134 1080 t
1124 (constant)720 1200 w
1125 10 CW f
1126 (\\n)1081 1200 w
1127 10 R f
1128 ( cou\255)1 204( prevent incorrect changes, the command can be prefixed by a)10 2503( To)1 164(in characters or strings.)3 940 4 1229 1200 t
1129 (ple of)1 230 1 720 1320 t
1130 10 CW f
1131 (y)975 1320 w
1132 10 R f
1133 (commands to weed out characters and strings:)6 1841 1 1060 1320 t
1134 10 CW f
1135 (y/".*"/ y/'.*'/ x/[a\255zA\255Z0\2559]+/ g/n/ v/../ c/num/)5 2940 1 1080 1500 t
1136 10 R f
1137 (This example illustrates the power of composing extractions and conditionals, but it is not artificial: it)15 4070 1 970 1716 t
1138 (was encountered when editing a real program \(in fact,)8 2192 1 720 1836 t
1139 10 CW f
1140 (sam)2942 1836 w
1141 10 R f
1142 ( with shell pipe\255)3 659( is an obvious analogy)4 914(\). There)1 345 3 3122 1836 t
1143 (lines, but these command)3 1019 1 720 1956 t
1144 10 I f
1145 (chains)1765 1956 w
1146 10 R f
1147 (are subtly)1 393 1 2052 1956 t
1148 10 S1 f
1149 (\320)2472 1956 w
1150 10 R f
1151 (and importantly)1 638 1 2599 1956 t
1152 10 S1 f
1153 (\320)3264 1956 w
1154 10 R f
1155 ( flows into)2 432( Data)1 240(different from pipelines.)2 977 3 3391 1956 t
1156 ( chains, the data flow is implicit:)6 1338( In)1 138( pipeline and emerges transformed from the right end.)8 2194(the left end of a)4 650 4 720 2076 t
1157 ( commands are operating on the same data \(except that the last element of the chain may modify the)18 4070(all the)1 250 2 720 2196 t
1158 ( is being)2 345( What)1 269( flows through the chain.)4 1008(text\); the complete operation is done in place; and no data actually)11 2698 4 720 2316 t
1159 ( in the)2 262(passed from link to link in the chain is a view of the data, until it looks right for the final command)21 4058 2 720 2436 t
1160 ( data stays the same, only the structure is modified.)9 2045(chain. The)1 446 2 720 2556 t
1161 10 B f
1162 (More than one line, and less than one line)8 1771 1 720 2796 t
1163 10 R f
1164 (The standard)1 532 1 970 2952 t
1165 9 R f
1166 (UNIX)1539 2952 w
1167 10 R f
1168 (tools have difficulty handling several lines at a time, if they can do so at all.)15 3237 1 1803 2952 t
1169 10 CW f
1170 (Grep)720 3072 w
1171 10 R f
1172 (,)960 3072 w
1173 10 CW f
1174 (sort)1022 3072 w
1175 10 R f
1176 (and)1299 3072 w
1177 10 CW f
1178 (diff)1480 3072 w
1179 10 R f
1180 ( if they could operate on larger)6 1296(work on lines only, although it would be useful)8 1987 2 1757 3072 t
1181 ( as a)2 197(pieces, such)1 491 2 720 3192 t
1182 10 CW f
1183 (refer)1443 3192 w
1184 10 R f
1185 (database.)1778 3192 w
1186 10 CW f
1187 (awk)2206 3192 w
1188 10 R f
1189 (can be tricked into accepting multiple\255line records, but then the)9 2619 1 2421 3192 t
1190 ( sub\255pieces \(typically ordinary lines\) by explicit code.)7 2236(actions must break out the)4 1101 2 720 3312 t
1191 10 CW f
1192 (sed)4119 3312 w
1193 10 R f
1194 (has a unique and)3 704 1 4336 3312 t
1195 (clumsy mechanism for manipulating multiple lines, which few have mastered.)9 3127 1 720 3432 t
1196 ( a)1 84( Consider)1 426(Structural expressions make it easy to specify multiple\255line actions.)8 2812 3 970 3588 t
1197 10 CW f
1198 (refer)4332 3588 w
1199 10 R f
1200 (database,)4672 3588 w
1201 ( percent sign and)3 685( line of a record begins with a)7 1210( Each)1 252(which has multi\255line records separated by blank lines.)7 2173 4 720 3708 t
1202 ( the line:)2 366(a character indicating the type of information on)7 1981 2 720 3828 t
1203 10 CW f
1204 (A)3100 3828 w
1205 10 R f
1206 (for author,)1 429 1 3193 3828 t
1207 10 CW f
1208 (T)3655 3828 w
1209 10 R f
1210 ( with)1 211( Staying)1 364(for title, etc.)2 504 3 3748 3828 t
1211 10 CW f
1212 (sam)4860 3828 w
1213 10 R f
1214 (notation, the command to search a)5 1370 1 720 3948 t
1215 10 CW f
1216 (refer)2115 3948 w
1217 10 R f
1218 (database for all papers written by Bimmler is:)7 1828 1 2440 3948 t
1219 10 CW f
1220 (x/\(.+\\n\)+/ g/%A.*Bimmler/p)1 1560 1 1080 4128 t
1221 10 S1 f
1222 (\320)720 4308 w
1223 10 R f
1224 ( set of lines containing `Bimm\255)5 1257(break the file into non\255empty sequences of non\255empty lines and print any)11 2937 2 846 4308 t
1225 ( be compatible with the other tools, a `)8 1572( \(To)1 198( after `%A'.)2 486(ler' on a line)3 522 4 720 4428 t
1226 10 CW f
1227 (.)3498 4428 w
1228 10 R f
1229 ( Except)1 331(' does not match a newline.\))5 1151 2 3558 4428 t
1230 (for the structural expression, this is a regular)7 1836 1 720 4548 t
1231 10 CW f
1232 (grep)2589 4548 w
1233 10 R f
1234 ( that)1 184(operation, implying)1 797 2 2862 4548 t
1235 10 CW f
1236 (grep)3877 4548 w
1237 10 R f
1238 (could benefit from an)3 889 1 4151 4548 t
1239 ( `stream)1 339( the short term, however, a)5 1147( In)1 149(additional regular expression to define the structure of its input.)9 2685 4 720 4668 t
1240 10 CW f
1241 (sam)720 4788 w
1242 10 R f
1243 (,' analogous to)2 591 1 900 4788 t
1244 10 CW f
1245 (sed,)1516 4788 w
1246 10 R f
1247 (would be convenient, and is currently being implemented.)7 2322 1 1781 4788 t
1248 ( example, we can)3 713( For)1 196( search program.)2 681(The ability to compose expressions makes it easy to tune the)10 2480 4 970 4944 t
1249 (select just the)2 544 1 720 5064 t
1250 10 I f
1251 (titles)1289 5064 w
1252 10 R f
1253 (of the papers written by Bimmler by applying another extraction:)9 2605 1 1509 5064 t
1254 10 CW f
1255 (x/\(.+\\n\)+/ g/%A.*Bimmler/ x/.*\\n/ g/%T/p)3 2400 1 1080 5244 t
1256 10 R f
1257 ( into individual lines, then prints the lines con\255)8 1912(This program breaks the records with author Bimmler back)8 2408 2 720 5424 t
1258 (taining)720 5544 w
1259 10 CW f
1260 (%T)1023 5544 w
1261 10 R f
1262 (.)1143 5544 w
1263 ( examples of multiple\255line components of files that may profitably be extracted,)11 3212(There are many other)3 858 2 970 5700 t
1264 (such as C functions, messages in mail boxes, paragraphs in)9 2415 1 720 5820 t
1265 10 CW f
1266 (troff)3166 5820 w
1267 10 R f
1268 ( records in on\255line telephone)4 1162(input and)1 381 2 3497 5820 t
1269 ( that, unlike in systems that define file structures)8 1948(books. Note)1 509 2 720 5940 t
1270 10 I f
1271 (a priori)1 310 1 3203 5940 t
1272 10 R f
1273 (, the structures are applied by the pro\255)7 1527 1 3513 5940 t
1274 ( sometimes a C)3 644( means the structure can change from application to application;)9 2658( This)1 239(gram, not the data.)3 779 4 720 6060 t
1275 ( and sometimes it is just a byte)7 1343(program is an array of functions, but sometimes it is an array of lines,)13 2977 2 720 6180 t
1276 (stream.)720 6300 w
1277 ( determine the appearance of their input,)6 1627(If the standard commands admitted a structural expression to)8 2443 2 970 6456 t
1278 ( a version of)3 553(many currently annoying problems could become simple: imagine)7 2790 2 720 6576 t
1279 10 CW f
1280 (diff)4107 6576 w
1281 10 R f
1282 (that could print)2 649 1 4391 6576 t
1283 ( or functions instead of changed lines, or a)8 1710(changed sentences)1 740 2 720 6696 t
1284 10 CW f
1285 (sort)3197 6696 w
1286 10 R f
1287 (that could sort a)3 647 1 3464 6696 t
1288 10 CW f
1289 (refer)4138 6696 w
1290 10 R f
1291 (database. The)1 575 1 4465 6696 t
1292 (case of)1 281 1 720 6816 t
1293 10 CW f
1294 (sort)1028 6816 w
1295 10 R f
1296 ( the input records be described by a struc\255)8 1676(is particularly interesting: not only can the shape of)8 2069 2 1295 6816 t
1297 ( current bewildering maze of options to control the)8 2062( The)1 209(tural expression, but also the shape of the sort key.)9 2049 3 720 6936 t
1298 ( be largely replaced by a structural expression to extract the key from the record, with)15 3434(sort could in principle)3 886 2 720 7056 t
1299 (multiple expressions to define multiple keys.)5 1794 1 720 7176 t
1300 cleartomark
1301 showpage
1302 saveobj restore
1303 %%EndPage: 5 5
1304 %%Page: 6 6
1305 /saveobj save def
1306 mark
1307 6 pagesetup
1308 10 R f
1309 (\255 6 \255)2 166 1 2797 480 t
1310 10 B f
1311 (The)720 840 w
1312 10 CW f
1313 (awk)912 840 w
1314 10 B f
1315 (of the future?)2 582 1 1117 840 t
1316 10 R f
1317 (It is entertaining to imagine a version of)7 1622 1 970 996 t
1318 10 CW f
1319 (awk)2621 996 w
1320 10 R f
1321 ( as discussed)2 524( First,)1 263(that applies these ideas throughout.)4 1423 3 2830 996 t
1322 ( For)1 199( to the actions would be defined, rather than merely selected, by the patterns.)13 3196(earlier, the text passed)3 925 3 720 1116 t
1323 (example,)720 1236 w
1324 10 CW f
1325 (/#+/ { print })3 840 1 1080 1416 t
1326 10 R f
1327 (would print only)2 667 1 720 1596 t
1328 10 CW f
1329 (#)1412 1596 w
1330 10 R f
1331 (characters; conventional)1 972 1 1497 1596 t
1332 10 CW f
1333 (awk)2494 1596 w
1334 10 R f
1335 (would instead print every line containing)5 1640 1 2699 1596 t
1336 10 CW f
1337 (#)4364 1596 w
1338 10 R f
1339 (characters.)4449 1596 w
1340 ( of using the restrictive idea of a)7 1317( Instead)1 342( parsed.)1 314(Next, the expressions would define how the input is)8 2097 4 970 1752 t
1341 ( instance, in)2 496( For)1 197( demarcate fields.)2 722(field separator, the iterations implied by closures in the expression can)10 2905 4 720 1872 t
1342 (the program)1 485 1 720 1992 t
1343 10 CW f
1344 (/\(.+\\n\)+/ {)1 660 1 1080 2172 t
1345 10 I f
1346 (action)1800 2172 w
1347 10 CW f
1348 (})2110 2172 w
1349 10 R f
1350 ( lines, but the outermost closure \(the)6 1481(the action sees groups of)4 999 2 720 2352 t
1351 10 CW f
1352 (+)3229 2352 w
1353 10 R f
1354 (operator\) examines, and hence can extract,)5 1722 1 3318 2352 t
1355 (the individual lines.)2 802 1 720 2472 t
1356 10 CW f
1357 (ed)1577 2472 w
1358 10 R f
1359 ( We)1 192( back\255referencing operators.)2 1128(uses parentheses to define sub\255expressions for its)6 1993 3 1727 2472 t
1360 ( to define the `fields' in)5 980(can modify this idea)3 834 2 720 2592 t
1361 10 CW f
1362 (awk)2567 2592 w
1363 10 R f
1364 (, so)1 147 1 2747 2592 t
1365 10 CW f
1366 ($1)2927 2592 w
1367 10 R f
1368 (defines the first element of the closure \(the first)8 1960 1 3080 2592 t
1369 (line\),)720 2712 w
1370 10 CW f
1371 ($2)961 2712 w
1372 10 R f
1373 ( arrays, so the)3 575( interestingly, the closures could generate indices for)7 2163( More)1 274(the second, and so on.)4 914 4 1114 2712 t
1374 ( say,)1 191(fields would be called,)3 925 2 720 2832 t
1375 10 CW f
1376 (input[1])1869 2832 w
1377 10 R f
1378 (and so on, perhaps with the unadorned identifier)7 1986 1 2382 2832 t
1379 10 CW f
1380 (input)4401 2832 w
1381 10 R f
1382 (holding)4734 2832 w
1383 ( generate multi\255dimensional)2 1163( has the advantage that nested closures can)7 1858( This)1 250(the original intact string.)3 1049 4 720 2952 t
1384 ( is some subtlety involving the relationship between)7 2192( \(There)1 331(arrays, which is notationally clean.)4 1456 3 720 3072 t
1385 10 CW f
1386 (input)4740 3072 w
1387 10 R f
1388 (indices and the order of the closures in the pattern, but the details are not important here.\))16 3571 1 720 3192 t
1389 (Finally, as in)2 524 1 970 3348 t
1390 10 CW f
1391 (sam)1521 3348 w
1392 10 R f
1393 ( expressions would be applicable to the output of structural expressions;)10 2910(, structural)1 429 2 1701 3348 t
1394 ( following program computes)3 1185( The)1 205( actions.)1 333(that is, we would be able to nest structural expressions inside the)11 2597 4 720 3468 t
1395 (how many pages of articles Bimmler has written:)7 1967 1 720 3588 t
1396 10 CW f
1397 ( break into records)3 1140(/\(.+\\n\)+/{ #)1 900 2 1080 3768 t
1398 ( is Bimmler author? \(see text\))5 1800( #)1 300(input ~ /%A.*Bimmler/{)2 1320 3 1330 3888 t
1399 ( extract page numbers)3 1260(/%P.*\([0\2559]+\)\255\([0\2559]+\)/{ #)1 1740 2 1580 4008 t
1400 (pages+=input[2]\255input[1]+1)1830 4128 w
1401 (})1580 4248 w
1402 (})1330 4368 w
1403 (})1080 4488 w
1404 (END{)1080 4608 w
1405 (print pages)1 660 1 1330 4728 t
1406 (})1080 4848 w
1407 10 R f
1408 (Real)720 5028 w
1409 10 CW f
1410 (awk)935 5028 w
1411 10 R f
1412 ( \(that is, regular expressions\) only like)6 1582(uses patterns)1 520 2 1147 5028 t
1413 10 CW f
1414 (sam)3282 5028 w
1415 10 R f
1416 ('s)3462 5028 w
1417 10 CW f
1418 (g)3567 5028 w
1419 10 R f
1420 (command, but our)2 746 1 3660 5028 t
1421 10 CW f
1422 (awk)4439 5028 w
1423 10 R f
1424 ('s patterns)1 421 1 4619 5028 t
1425 (are)720 5148 w
1426 10 CW f
1427 (x)871 5148 w
1428 10 R f
1429 ( is why in the pro\255)5 750( This)1 232( we need both to exploit structural expressions well.)8 2110(expressions. Obviously,)1 987 4 961 5148 t
1430 (gram above the test for whether)5 1276 1 720 5268 t
1431 10 CW f
1432 (input)2024 5268 w
1433 10 R f
1434 (contains a paper by Bimmler must be written as an explicit pattern)11 2688 1 2352 5268 t
1435 ( separated by a dash, which is how)7 1393( innermost pattern searches for lines containing two numbers)8 2451(match. The)1 476 3 720 5388 t
1436 10 CW f
1437 (refer)720 5508 w
1438 10 R f
1439 (stores the starting and ending pages of the article.)8 1977 1 1045 5508 t
1440 ( real)1 178( The)1 209(This is a contrived example, of course, but it illustrates the basic ideas.)12 2875 3 970 5664 t
1441 10 CW f
1442 (awk)4261 5664 w
1443 10 R f
1444 (suffers from a)2 569 1 4471 5664 t
1445 ( making the parsing actions of the)6 1366( would be improved by)4 939( It)1 114(mismatch between the patterns and the actions.)6 1901 4 720 5784 t
1446 ( lan\255)1 185( A)1 127( pattern\255matching abilities available in the actions.)6 2047(patterns visible in the actions, and by having the)8 1961 4 720 5904 t
1447 (guage with regular expressions should not base its text manipulation on a)11 2928 1 720 6024 t
1448 10 CW f
1449 (substr)3673 6024 w
1450 10 R f
1451 (function.)4058 6024 w
1452 10 B f
1453 (Comments)720 6264 w
1454 10 R f
1455 ( is a powerful and convenient, if unfa\255)7 1545(The use of regular expressions to describe the structure of files)10 2525 2 970 6420 t
1456 ( current)1 308(miliar, way to address a number of difficulties the)8 2010 2 720 6540 t
1457 9 R f
1458 (UNIX)3062 6540 w
1459 10 R f
1460 ( is obviously around this)4 988( There)1 283(tools share.)1 456 3 3313 6540 t
1461 ( all.)1 172(new notation a number of interesting problems, and I am not pretending to have addressed them)15 4148 2 720 6660 t
1462 ( the possibilities,)2 678(Rather, I have skipped enthusiastically from example to example to indicate the breadth of)13 3642 2 720 6780 t
1463 ( these ideas, and perhaps to)5 1117( hope is to encourage others to think about)8 1730( My)1 193(not the depth of the difficulties.)5 1280 4 720 6900 t
1464 (apply them to old tools as well as new ones.)9 1760 1 720 7020 t
1465 cleartomark
1466 showpage
1467 saveobj restore
1468 %%EndPage: 6 6
1469 %%Page: 7 7
1470 /saveobj save def
1471 mark
1472 7 pagesetup
1473 10 R f
1474 (\255 7 \255)2 166 1 2797 480 t
1475 10 B f
1476 (Acknowledgements)720 840 w
1477 10 R f
1478 ( some of their ideas)4 806(John Linderman, Chris Van Wyk, Tom Duff and Norman Wilson will recognize)11 3264 2 970 996 t
1479 ( hope I have not misrepresented them.)6 1522( I)1 83(in these notes.)2 569 3 720 1116 t
1480 cleartomark
1481 showpage
1482 saveobj restore
1483 %%EndPage: 7 7
1484 %%Trailer
1485 done
1486 %%Pages: 7
1487 %%DocumentFonts: Times-Roman Times-Bold Times-Italic Times-Roman Courier