sed.c - sbase - suckless unix tools
(HTM) git clone git://git.suckless.org/sbase
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
sed.c (41896B)
---
1 /* FIXME: summary
2 * decide whether we enforce valid UTF-8, right now it's enforced in certain
3 * parts of the script, but not the input...
4 * nul bytes cause explosions due to use of libc string functions. thoughts?
5 * lack of newline at end of file, currently we add one. what should we do?
6 * allow "\\t" for "\t" etc. in regex? in replacement text?
7 * POSIX says don't flush on N when out of input, but GNU and busybox do.
8 */
9
10 #include <ctype.h>
11 #include <errno.h>
12 #include <regex.h>
13 #include <stdlib.h>
14 #include <string.h>
15
16 #include "utf.h"
17 #include "util.h"
18
19 /* Types */
20
21 /* used as queue for writes and stack for {,:,b,t */
22 typedef struct {
23 void **data;
24 size_t size;
25 size_t cap;
26 } Vec;
27
28 /* used for arbitrary growth, str is a C string
29 * FIXME: does it make sense to keep track of length? or just rely on libc
30 * string functions? If we want to support nul bytes everything changes
31 */
32 typedef struct {
33 char *str;
34 size_t cap;
35 } String;
36
37 typedef struct Cmd Cmd;
38 typedef struct {
39 void (*fn)(Cmd *);
40 char *(*getarg)(Cmd *, char *);
41 void (*freearg)(Cmd *);
42 unsigned char naddr;
43 } Fninfo;
44
45 typedef struct {
46 union {
47 size_t lineno;
48 regex_t *re;
49 } u;
50 enum {
51 IGNORE, /* empty address, ignore */
52 EVERY , /* every line */
53 LINE , /* line number */
54 LAST , /* last line ($) */
55 REGEX , /* use included regex */
56 LASTRE, /* use most recently used regex */
57 } type;
58 } Addr;
59
60 /* DISCUSS: naddr is not strictly necessary, but very helpful
61 * naddr == 0 iff beg.type == EVERY && end.type == IGNORE
62 * naddr == 1 iff beg.type != IGNORE && end.type == IGNORE
63 * naddr == 2 iff beg.type != IGNORE && end.type != IGNORE
64 */
65 typedef struct {
66 Addr beg;
67 Addr end;
68 unsigned char naddr;
69 } Range;
70
71 typedef struct {
72 regex_t *re; /* if NULL use last regex */
73 String repl;
74 FILE *file;
75 size_t occurrence; /* 0 for all (g flag) */
76 Rune delim;
77 unsigned int p:1;
78 } Sarg;
79
80 typedef struct {
81 Rune *set1;
82 Rune *set2;
83 } Yarg;
84
85 typedef struct {
86 String str; /* a,c,i text. r file path */
87 void (*print)(char *, FILE *); /* check_puts for a, write_file for r, unused for c,i */
88 } ACIRarg;
89
90 struct Cmd {
91 Range range;
92 Fninfo *fninfo;
93 union {
94 Cmd *jump; /* used for b,t when running */
95 char *label; /* used for :,b,t when building */
96 ptrdiff_t offset; /* used for { (pointers break during realloc) */
97 FILE *file; /* used for w */
98
99 /* FIXME: Should the following be in the union? or pointers and malloc? */
100 Sarg s;
101 Yarg y;
102 ACIRarg acir;
103 } u; /* I find your lack of anonymous unions disturbing */
104 unsigned int in_match:1;
105 unsigned int negate :1;
106 };
107
108 /* Files for w command (and s' w flag) */
109 typedef struct {
110 char *path;
111 FILE *file;
112 } Wfile;
113
114 /*
115 * Function Declarations
116 */
117
118 /* Dynamically allocated arrays and strings */
119 static void resize(void **ptr, size_t *nmemb, size_t size, size_t new_nmemb, void **next);
120 static void *pop(Vec *v);
121 static void push(Vec *v, void *p);
122 static void stracat(String *dst, char *src);
123 static void strnacat(String *dst, char *src, size_t n);
124 static void stracpy(String *dst, char *src);
125
126 /* Cleanup and errors */
127 static void usage(void);
128
129 /* Parsing functions and related utilities */
130 static void compile(char *s, int isfile);
131 static int read_line(FILE *f, String *s);
132 static char *make_range(Range *range, char *s);
133 static char *make_addr(Addr *addr, char *s);
134 static char *find_delim(char *s, Rune delim, int do_brackets);
135 static char *chompr(char *s, Rune rune);
136 static char *chomp(char *s);
137 static Rune *strtorunes(char *s, size_t nrunes);
138 static long stol(char *s, char **endp);
139 static size_t escapes(char *beg, char *end, Rune delim, int n_newline);
140 static size_t echarntorune(Rune *r, char *s, size_t n);
141 static void insert_labels(void);
142
143 /* Get and Free arg and related utilities */
144 static char *get_aci_arg(Cmd *c, char *s);
145 static void aci_append(Cmd *c, char *s);
146 static void free_acir_arg(Cmd *c);
147 static char *get_bt_arg(Cmd *c, char *s);
148 static char *get_r_arg(Cmd *c, char *s);
149 static char *get_s_arg(Cmd *c, char *s);
150 static void free_s_arg(Cmd *c);
151 static char *get_w_arg(Cmd *c, char *s);
152 static char *get_y_arg(Cmd *c, char *s);
153 static void free_y_arg(Cmd *c);
154 static char *get_colon_arg(Cmd *c, char *s);
155 static char *get_lbrace_arg(Cmd *c, char *s);
156 static char *get_rbrace_arg(Cmd *c, char *s);
157 static char *semicolon_arg(char *s);
158
159 /* Running */
160 static void run(void);
161 static int in_range(Cmd *c);
162 static int match_addr(Addr *a);
163 static int next_file(void);
164 static int is_eof(FILE *f);
165 static void do_writes(void);
166 static void write_file(char *path, FILE *out);
167 static void check_puts(char *s, FILE *f);
168 static void update_ranges(Cmd *beg, Cmd *end);
169
170 /* Sed functions */
171 static void cmd_y(Cmd *c);
172 static void cmd_x(Cmd *c);
173 static void cmd_w(Cmd *c);
174 static void cmd_t(Cmd *c);
175 static void cmd_s(Cmd *c);
176 static void cmd_r(Cmd *c);
177 static void cmd_q(Cmd *c);
178 static void cmd_P(Cmd *c);
179 static void cmd_p(Cmd *c);
180 static void cmd_N(Cmd *c);
181 static void cmd_n(Cmd *c);
182 static void cmd_l(Cmd *c);
183 static void cmd_i(Cmd *c);
184 static void cmd_H(Cmd *c);
185 static void cmd_h(Cmd *c);
186 static void cmd_G(Cmd *c);
187 static void cmd_g(Cmd *c);
188 static void cmd_D(Cmd *c);
189 static void cmd_d(Cmd *c);
190 static void cmd_c(Cmd *c);
191 static void cmd_b(Cmd *c);
192 static void cmd_a(Cmd *c);
193 static void cmd_colon(Cmd *c);
194 static void cmd_equal(Cmd *c);
195 static void cmd_lbrace(Cmd *c);
196 static void cmd_rbrace(Cmd *c);
197 static void cmd_last(Cmd *c);
198
199 /* Actions */
200 static void new_line(void);
201 static void app_line(void);
202 static void new_next(void);
203 static void old_next(void);
204
205 /*
206 * Globals
207 */
208 static Vec braces, labels, branches; /* holds ptrdiff_t. addrs of {, :, bt */
209 static Vec writes; /* holds cmd*. writes scheduled by a and r commands */
210 static Vec wfiles; /* holds Wfile*. files for w and s///w commands */
211
212 static Cmd *prog, *pc; /* Program, program counter */
213 static size_t pcap;
214 static size_t lineno;
215
216 static regex_t *lastre; /* last used regex for empty regex search */
217 static char **files; /* list of file names from argv */
218 static FILE *file; /* current file we are reading */
219 static int ret; /* exit status */
220
221 static String patt, hold, genbuf;
222
223 static struct {
224 unsigned int n :1; /* -n (no print) */
225 unsigned int E :1; /* -E (extended re) */
226 unsigned int s :1; /* s/// replacement happened */
227 unsigned int aci_cont:1; /* a,c,i text continuation */
228 unsigned int s_cont :1; /* s/// replacement text continuation */
229 unsigned int halt :1; /* halt execution */
230 } gflags;
231
232 /* FIXME: move character inside Fninfo and only use 26*sizeof(Fninfo) instead of 127*sizeof(Fninfo) bytes */
233 static Fninfo fns[] = {
234 ['a'] = { cmd_a , get_aci_arg , free_acir_arg , 1 }, /* schedule write of text for later */
235 ['b'] = { cmd_b , get_bt_arg , NULL , 2 }, /* branch to label char *label when building, Cmd *jump when running */
236 ['c'] = { cmd_c , get_aci_arg , free_acir_arg , 2 }, /* delete pattern space. at 0 or 1 addr or end of 2 addr, write text */
237 ['d'] = { cmd_d , NULL , NULL , 2 }, /* delete pattern space */
238 ['D'] = { cmd_D , NULL , NULL , 2 }, /* delete to first newline and start new cycle without reading (if no newline, d) */
239 ['g'] = { cmd_g , NULL , NULL , 2 }, /* replace pattern space with hold space */
240 ['G'] = { cmd_G , NULL , NULL , 2 }, /* append newline and hold space to pattern space */
241 ['h'] = { cmd_h , NULL , NULL , 2 }, /* replace hold space with pattern space */
242 ['H'] = { cmd_H , NULL , NULL , 2 }, /* append newline and pattern space to hold space */
243 ['i'] = { cmd_i , get_aci_arg , free_acir_arg , 1 }, /* write text */
244 ['l'] = { cmd_l , NULL , NULL , 2 }, /* write pattern space in 'visually unambiguous form' */
245 ['n'] = { cmd_n , NULL , NULL , 2 }, /* write pattern space (unless -n) read to replace pattern space (if no input, quit) */
246 ['N'] = { cmd_N , NULL , NULL , 2 }, /* append to pattern space separated by newline, line number changes (if no input, quit) */
247 ['p'] = { cmd_p , NULL , NULL , 2 }, /* write pattern space */
248 ['P'] = { cmd_P , NULL , NULL , 2 }, /* write pattern space up to first newline */
249 ['q'] = { cmd_q , NULL , NULL , 1 }, /* quit */
250 ['r'] = { cmd_r , get_r_arg , free_acir_arg , 1 }, /* write contents of file (unable to open/read treated as empty file) */
251 ['s'] = { cmd_s , get_s_arg , free_s_arg , 2 }, /* find/replace/all that crazy s stuff */
252 ['t'] = { cmd_t , get_bt_arg , NULL , 2 }, /* if s/// succeeded (since input or last t) branch to label (branch to end if no label) */
253 ['w'] = { cmd_w , get_w_arg , NULL , 2 }, /* append pattern space to file */
254 ['x'] = { cmd_x , NULL , NULL , 2 }, /* exchange pattern and hold spaces */
255 ['y'] = { cmd_y , get_y_arg , free_y_arg , 2 }, /* replace runes in set1 with runes in set2 */
256 [':'] = { cmd_colon , get_colon_arg , NULL , 0 }, /* defines label for later b and t commands */
257 ['='] = { cmd_equal , NULL , NULL , 1 }, /* printf("%d\n", line_number); */
258 ['{'] = { cmd_lbrace, get_lbrace_arg, NULL , 2 }, /* if we match, run commands, otherwise jump to close */
259 ['}'] = { cmd_rbrace, get_rbrace_arg, NULL , 0 }, /* noop, hold onto open for ease of building scripts */
260
261 [0x7f] = { NULL, NULL, NULL, 0 }, /* index is checked with isascii(3p). fill out rest of array */
262 };
263
264 /*
265 * Function Definitions
266 */
267
268 /* given memory pointed to by *ptr that currently holds *nmemb members of size
269 * size, realloc to hold new_nmemb members, return new_nmemb in *memb and one
270 * past old end in *next. if realloc fails...explode
271 */
272 static void
273 resize(void **ptr, size_t *nmemb, size_t size, size_t new_nmemb, void **next)
274 {
275 void *n, *tmp;
276
277 if (new_nmemb) {
278 tmp = ereallocarray(*ptr, new_nmemb, size);
279 } else { /* turns out realloc(*ptr, 0) != free(*ptr) */
280 free(*ptr);
281 tmp = NULL;
282 }
283 n = (char *)tmp + *nmemb * size;
284 *nmemb = new_nmemb;
285 *ptr = tmp;
286 if (next)
287 *next = n;
288 }
289
290 static void *
291 pop(Vec *v)
292 {
293 if (!v->size)
294 return NULL;
295 return v->data[--v->size];
296 }
297
298 static void
299 push(Vec *v, void *p)
300 {
301 if (v->size == v->cap)
302 resize((void **)&v->data, &v->cap, sizeof(*v->data), v->cap * 2 + 1, NULL);
303 v->data[v->size++] = p;
304 }
305
306 static void
307 stracat(String *dst, char *src)
308 {
309 int new = !dst->cap;
310 size_t len;
311
312 len = (new ? 0 : strlen(dst->str)) + strlen(src) + 1;
313 if (dst->cap < len)
314 resize((void **)&dst->str, &dst->cap, 1, len * 2, NULL);
315 if (new)
316 *dst->str = '\0';
317 strcat(dst->str, src);
318 }
319
320 static void
321 strnacat(String *dst, char *src, size_t n)
322 {
323 int new = !dst->cap;
324 size_t len;
325
326 len = strlen(src);
327 len = (new ? 0 : strlen(dst->str)) + MIN(n, len) + 1;
328 if (dst->cap < len)
329 resize((void **)&dst->str, &dst->cap, 1, len * 2, NULL);
330 if (new)
331 *dst->str = '\0';
332 strlcat(dst->str, src, len);
333 }
334
335 static void
336 stracpy(String *dst, char *src)
337 {
338 size_t len;
339
340 len = strlen(src) + 1;
341 if (dst->cap < len)
342 resize((void **)&dst->str, &dst->cap, 1, len * 2, NULL);
343 strcpy(dst->str, src);
344 }
345
346 static void
347 leprintf(char *s)
348 {
349 if (errno)
350 eprintf("%zu: %s: %s\n", lineno, s, strerror(errno));
351 else
352 eprintf("%zu: %s\n", lineno, s);
353 }
354
355 /* FIXME: write usage message */
356 static void
357 usage(void)
358 {
359 eprintf("usage: sed [-nrE] script [file ...]\n"
360 " sed [-nrE] -e script [-e script] ... [-f scriptfile] ... [file ...]\n"
361 " sed [-nrE] [-e script] ... -f scriptfile [-f scriptfile] ... [file ...]\n");
362 }
363
364 /* Differences from POSIX
365 * we allows semicolons and trailing blanks inside {}
366 * we allow spaces after ! (and in between !s)
367 * we allow extended regular expressions (-E)
368 */
369 static void
370 compile(char *s, int isfile)
371 {
372 FILE *f;
373
374 if (isfile) {
375 f = fopen(s, "r");
376 if (!f)
377 eprintf("fopen %s:", s);
378 } else {
379 if (!*s) /* empty string script */
380 return;
381 f = fmemopen(s, strlen(s), "r");
382 if (!f)
383 eprintf("fmemopen:");
384 }
385
386 /* NOTE: get arg functions can't use genbuf */
387 while (read_line(f, &genbuf) != EOF) {
388 s = genbuf.str;
389
390 /* if the first two characters of the script are "#n" default output shall be suppressed */
391 if (++lineno == 1 && *s == '#' && s[1] == 'n') {
392 gflags.n = 1;
393 continue;
394 }
395
396 if (gflags.aci_cont) {
397 aci_append(pc - 1, s);
398 continue;
399 }
400 if (gflags.s_cont)
401 s = (pc - 1)->fninfo->getarg(pc - 1, s);
402
403 while (*s) {
404 s = chompr(s, ';');
405 if (!*s || *s == '#')
406 break;
407
408 if ((size_t)(pc - prog) == pcap)
409 resize((void **)&prog, &pcap, sizeof(*prog), pcap * 2 + 1, (void **)&pc);
410
411 pc->range.beg.type = pc->range.end.type = IGNORE;
412 pc->fninfo = NULL;
413 pc->in_match = 0;
414
415 s = make_range(&pc->range, s);
416 s = chomp(s);
417 pc->negate = *s == '!';
418 s = chompr(s, '!');
419
420 if (!isascii(*s) || !(pc->fninfo = &fns[(unsigned)*s])->fn)
421 leprintf("bad sed function");
422 if (pc->range.naddr > pc->fninfo->naddr)
423 leprintf("wrong number of addresses");
424 s++;
425
426 if (pc->fninfo->getarg)
427 s = pc->fninfo->getarg(pc, s);
428
429 pc++;
430 }
431 }
432
433 fshut(f, s);
434 }
435
436 /* FIXME: if we decide to honor lack of trailing newline, set/clear a global
437 * flag when reading a line
438 */
439 static int
440 read_line(FILE *f, String *s)
441 {
442 ssize_t len;
443
444 if (!f)
445 return EOF;
446
447 if ((len = getline(&s->str, &s->cap, f)) < 0) {
448 if (ferror(f))
449 eprintf("getline:");
450 return EOF;
451 }
452 if (s->str[--len] == '\n')
453 s->str[len] = '\0';
454 return 0;
455 }
456
457 /* read first range from s, return pointer to one past end of range */
458 static char *
459 make_range(Range *range, char *s)
460 {
461 s = make_addr(&range->beg, s);
462
463 if (*s == ',')
464 s = make_addr(&range->end, s + 1);
465 else
466 range->end.type = IGNORE;
467
468 if (range->beg.type == EVERY && range->end.type == IGNORE) range->naddr = 0;
469 else if (range->beg.type != IGNORE && range->end.type == IGNORE) range->naddr = 1;
470 else if (range->beg.type != IGNORE && range->end.type != IGNORE) range->naddr = 2;
471 else leprintf("this is impossible...");
472
473 return s;
474 }
475
476 /* read first addr from s, return pointer to one past end of addr */
477 static char *
478 make_addr(Addr *addr, char *s)
479 {
480 Rune r;
481 char *p = s + strlen(s);
482 size_t rlen = echarntorune(&r, s, p - s);
483
484 if (r == '$') {
485 addr->type = LAST;
486 s += rlen;
487 } else if (isdigitrune(r)) {
488 addr->type = LINE;
489 addr->u.lineno = stol(s, &s);
490 } else if (r == '/' || r == '\\') {
491 Rune delim;
492 if (r == '\\') {
493 s += rlen;
494 rlen = echarntorune(&r, s, p - s);
495 }
496 if (r == '\\')
497 leprintf("bad delimiter '\\'");
498 delim = r;
499 s += rlen;
500 rlen = echarntorune(&r, s, p - s);
501 if (r == delim) {
502 addr->type = LASTRE;
503 s += rlen;
504 } else {
505 addr->type = REGEX;
506 p = find_delim(s, delim, 1);
507 if (!*p)
508 leprintf("unclosed regex");
509 p -= escapes(s, p, delim, 0);
510 *p++ = '\0';
511 addr->u.re = emalloc(sizeof(*addr->u.re));
512 eregcomp(addr->u.re, s, gflags.E ? REG_EXTENDED : 0);
513 s = p;
514 }
515 } else {
516 addr->type = EVERY;
517 }
518
519 return s;
520 }
521
522 /* return pointer to first delim in s that is not escaped
523 * and if do_brackets is set, not in [] (note possible [::], [..], [==], inside [])
524 * return pointer to trailing nul byte if no delim found
525 *
526 * any escaped character that is not special is just itself (POSIX undefined)
527 * FIXME: pull out into some util thing, will be useful for ed as well
528 */
529 static char *
530 find_delim(char *s, Rune delim, int do_brackets)
531 {
532 enum {
533 OUTSIDE , /* not in brackets */
534 BRACKETS_OPENING, /* last char was first [ or last two were first [^ */
535 BRACKETS_INSIDE , /* inside [] */
536 INSIDE_OPENING , /* inside [] and last char was [ */
537 CLASS_INSIDE , /* inside class [::], or colating element [..] or [==], inside [] */
538 CLASS_CLOSING , /* inside class [::], or colating element [..] or [==], and last character was the respective : . or = */
539 } state = OUTSIDE;
540
541 Rune r, c = 0; /* no c won't be used uninitialized, shutup -Wall */
542 size_t rlen;
543 int escape = 0;
544 char *end = s + strlen(s);
545
546 for (; *s; s += rlen) {
547 rlen = echarntorune(&r, s, end - s);
548
549 if (state == BRACKETS_OPENING && r == '^' ) { continue; }
550 else if (state == BRACKETS_OPENING && r == ']' ) { state = BRACKETS_INSIDE ; continue; }
551 else if (state == BRACKETS_OPENING ) { state = BRACKETS_INSIDE ; }
552
553 if (state == CLASS_CLOSING && r == ']' ) { state = BRACKETS_INSIDE ; }
554 else if (state == CLASS_CLOSING ) { state = CLASS_INSIDE ; }
555 else if (state == CLASS_INSIDE && r == c ) { state = CLASS_CLOSING ; }
556 else if (state == INSIDE_OPENING && (r == ':' ||
557 r == '.' ||
558 r == '=') ) { state = CLASS_INSIDE ; c = r; }
559 else if (state == INSIDE_OPENING && r == ']' ) { state = OUTSIDE ; }
560 else if (state == INSIDE_OPENING ) { state = BRACKETS_INSIDE ; }
561 else if (state == BRACKETS_INSIDE && r == '[' ) { state = INSIDE_OPENING ; }
562 else if (state == BRACKETS_INSIDE && r == ']' ) { state = OUTSIDE ; }
563 else if (state == OUTSIDE && escape ) { escape = 0 ; }
564 else if (state == OUTSIDE && r == '\\' ) { escape = 1 ; }
565 else if (state == OUTSIDE && r == delim) return s;
566 else if (state == OUTSIDE && do_brackets && r == '[' ) { state = BRACKETS_OPENING; }
567 }
568 return s;
569 }
570
571 static char *
572 chomp(char *s)
573 {
574 return chompr(s, 0);
575 }
576
577 /* eat all leading whitespace and occurrences of rune */
578 static char *
579 chompr(char *s, Rune rune)
580 {
581 Rune r;
582 size_t rlen;
583 char *end = s + strlen(s);
584
585 while (*s && (rlen = echarntorune(&r, s, end - s)) && (isspacerune(r) || r == rune))
586 s += rlen;
587 return s;
588 }
589
590 /* convert first nrunes Runes from UTF-8 string s in allocated Rune*
591 * NOTE: sequence must be valid UTF-8, check first */
592 static Rune *
593 strtorunes(char *s, size_t nrunes)
594 {
595 Rune *rs, *rp;
596
597 rp = rs = ereallocarray(NULL, nrunes + 1, sizeof(*rs));
598
599 while (nrunes--)
600 s += chartorune(rp++, s);
601
602 *rp = '\0';
603 return rs;
604 }
605
606 static long
607 stol(char *s, char **endp)
608 {
609 long n;
610 errno = 0;
611 n = strtol(s, endp, 10);
612
613 if (errno)
614 leprintf("strtol:");
615 if (*endp == s)
616 leprintf("strtol: invalid number");
617
618 return n;
619 }
620
621 /* from beg to end replace "\\d" with "d" and "\\n" with "\n" (where d is delim)
622 * if delim is 'n' and n_newline is 0 then "\\n" is replaced with "n" (normal)
623 * if delim is 'n' and n_newline is 1 then "\\n" is replaced with "\n" (y command)
624 * if delim is 0 all escaped characters represent themselves (aci text)
625 * memmove rest of string (beyond end) into place
626 * return the number of converted escapes (backslashes removed)
627 * FIXME: this has had too many corner cases slapped on and is ugly. rewrite better
628 */
629 static size_t
630 escapes(char *beg, char *end, Rune delim, int n_newline)
631 {
632 size_t num = 0;
633 char *src = beg, *dst = beg;
634
635 while (src < end) {
636 /* handle escaped backslash specially so we don't think the second
637 * backslash is escaping something */
638 if (*src == '\\' && src[1] == '\\') {
639 *dst++ = *src++;
640 if (delim)
641 *dst++ = *src++;
642 else
643 src++;
644 } else if (*src == '\\' && !delim) {
645 src++;
646 } else if (*src == '\\' && src[1]) {
647 Rune r;
648 size_t rlen;
649 num++;
650 src++;
651 rlen = echarntorune(&r, src, end - src);
652
653 if (r == 'n' && delim == 'n') {
654 *src = n_newline ? '\n' : 'n'; /* src so we can still memmove() */
655 } else if (r == 'n') {
656 *src = '\n';
657 } else if (r != delim) {
658 *dst++ = '\\';
659 num--;
660 }
661
662 memmove(dst, src, rlen);
663 dst += rlen;
664 src += rlen;
665 } else {
666 *dst++ = *src++;
667 }
668 }
669 memmove(dst, src, strlen(src) + 1);
670 return num;
671 }
672
673 static size_t
674 echarntorune(Rune *r, char *s, size_t n)
675 {
676 size_t rlen = charntorune(r, s, n);
677 if (!rlen || *r == Runeerror)
678 leprintf("invalid UTF-8");
679 return rlen;
680 }
681
682 static void
683 insert_labels(void)
684 {
685 size_t i;
686 Cmd *from, *to;
687
688 while (branches.size) {
689 from = prog + (ptrdiff_t)pop(&branches);
690
691 if (!from->u.label) {/* no label branch to end of script */
692 from->u.jump = pc - 1;
693 } else {
694 for (i = 0; i < labels.size; i++) {
695 to = prog + (ptrdiff_t)labels.data[i];
696 if (!strcmp(from->u.label, to->u.label)) {
697 from->u.jump = to;
698 break;
699 }
700 }
701 if (i == labels.size)
702 leprintf("bad label");
703 }
704 }
705 }
706
707 /*
708 * Getargs / Freeargs
709 * Read argument from s, return pointer to one past last character of argument
710 */
711
712 /* POSIX compliant
713 * i\
714 * foobar
715 *
716 * also allow the following non POSIX compliant
717 * i # empty line
718 * ifoobar
719 * ifoobar\
720 * baz
721 *
722 * FIXME: GNU and busybox discard leading spaces
723 * i foobar
724 * i foobar
725 * ifoobar
726 * are equivalent in GNU and busybox. We don't. Should we?
727 */
728 static char *
729 get_aci_arg(Cmd *c, char *s)
730 {
731 c->u.acir.print = check_puts;
732 c->u.acir.str = (String){ NULL, 0 };
733
734 gflags.aci_cont = !!*s; /* no continue flag if empty string */
735
736 /* neither empty string nor POSIX compliant */
737 if (*s && !(*s == '\\' && !s[1]))
738 aci_append(c, s);
739
740 return s + strlen(s);
741 }
742
743 static void
744 aci_append(Cmd *c, char *s)
745 {
746 char *end = s + strlen(s), *p = end;
747
748 gflags.aci_cont = 0;
749 while (--p >= s && *p == '\\')
750 gflags.aci_cont = !gflags.aci_cont;
751
752 if (gflags.aci_cont)
753 *--end = '\n';
754
755 escapes(s, end, 0, 0);
756 stracat(&c->u.acir.str, s);
757 }
758
759 static void
760 free_acir_arg(Cmd *c)
761 {
762 free(c->u.acir.str.str);
763 }
764
765 /* POSIX dictates that label is rest of line, including semicolons, trailing
766 * whitespace, closing braces, etc. and can be limited to 8 bytes
767 *
768 * I allow a semicolon or closing brace to terminate a label name, it's not
769 * POSIX compliant, but it's useful and every sed version I've tried to date
770 * does the same.
771 *
772 * FIXME: POSIX dictates that leading whitespace is ignored but trailing
773 * whitespace is not. This is annoying and we should probably get rid of it.
774 */
775 static char *
776 get_bt_arg(Cmd *c, char *s)
777 {
778 char *p = semicolon_arg(s = chomp(s));
779
780 if (p != s) {
781 c->u.label = estrndup(s, p - s);
782 } else {
783 c->u.label = NULL;
784 }
785
786 push(&branches, (void *)(c - prog));
787
788 return p;
789 }
790
791 /* POSIX dictates file name is rest of line including semicolons, trailing
792 * whitespace, closing braces, etc. and file name must be preceded by a space
793 *
794 * I allow a semicolon or closing brace to terminate a file name and don't
795 * enforce leading space.
796 *
797 * FIXME: decide whether trailing whitespace should be included and fix
798 * accordingly
799 */
800 static char *
801 get_r_arg(Cmd *c, char *s)
802 {
803 char *p = semicolon_arg(s = chomp(s));
804
805 if (p == s)
806 leprintf("no file name");
807
808 c->u.acir.str.str = estrndup(s, p - s);
809 c->u.acir.print = write_file;
810
811 return p;
812 }
813
814 /* we allow "\\n" in replacement text to mean "\n" (undefined in POSIX)
815 *
816 * FIXME: allow other escapes in regex and replacement? if so change escapes()
817 */
818 static char *
819 get_s_arg(Cmd *c, char *s)
820 {
821 Rune delim, r;
822 Cmd buf;
823 char *p;
824 int esc, lastre;
825
826 /* s/Find/Replace/Flags */
827
828 /* Find */
829 if (!gflags.s_cont) { /* NOT continuing from literal newline in replacement text */
830 lastre = 0;
831 c->u.s.repl = (String){ NULL, 0 };
832 c->u.s.occurrence = 1;
833 c->u.s.file = NULL;
834 c->u.s.p = 0;
835
836 if (!*s || *s == '\\')
837 leprintf("bad delimiter");
838
839 p = s + strlen(s);
840 s += echarntorune(&delim, s, p - s);
841 c->u.s.delim = delim;
842
843 echarntorune(&r, s, p - s);
844 if (r == delim) /* empty regex */
845 lastre = 1;
846
847 p = find_delim(s, delim, 1);
848 if (!*p)
849 leprintf("missing second delimiter");
850 p -= escapes(s, p, delim, 0);
851 *p = '\0';
852
853 if (lastre) {
854 c->u.s.re = NULL;
855 } else {
856 c->u.s.re = emalloc(sizeof(*c->u.s.re));
857 /* FIXME: different eregcomp that calls fatal */
858 eregcomp(c->u.s.re, s, gflags.E ? REG_EXTENDED : 0);
859 }
860 s = p + runelen(delim);
861 }
862
863 /* Replace */
864 delim = c->u.s.delim;
865
866 p = find_delim(s, delim, 0);
867 p -= escapes(s, p, delim, 0);
868 if (!*p) { /* no third delimiter */
869 /* FIXME: same backslash counting as aci_append() */
870 if (p[-1] != '\\')
871 leprintf("missing third delimiter or <backslash><newline>");
872 p[-1] = '\n';
873 gflags.s_cont = 1;
874 } else {
875 gflags.s_cont = 0;
876 }
877
878 /* check for bad references in replacement text */
879 *p = '\0';
880 for (esc = 0, p = s; *p; p++) {
881 if (esc) {
882 esc = 0;
883 if (isdigit(*p) && c->u.s.re && (size_t)(*p - '0') > c->u.s.re->re_nsub)
884 leprintf("back reference number greater than number of groups");
885 } else if (*p == '\\') {
886 esc = 1;
887 }
888 }
889 stracat(&c->u.s.repl, s);
890
891 if (gflags.s_cont)
892 return p;
893
894 s = p + runelen(delim);
895
896 /* Flags */
897 p = semicolon_arg(s = chomp(s));
898
899 /* FIXME: currently for simplicity take last of g or occurrence flags and
900 * ignore multiple p flags. need to fix that */
901 for (; s < p; s++) {
902 if (isdigit(*s)) {
903 c->u.s.occurrence = stol(s, &s);
904 s--; /* for loop will advance pointer */
905 } else {
906 switch (*s) {
907 case 'g': c->u.s.occurrence = 0; break;
908 case 'p': c->u.s.p = 1; break;
909 case 'w':
910 /* must be last flag, take everything up to newline/semicolon
911 * s == p after this */
912 s = get_w_arg(&buf, chomp(s+1));
913 c->u.s.file = buf.u.file;
914 break;
915 }
916 }
917 }
918 return p;
919 }
920
921 static void
922 free_s_arg(Cmd *c)
923 {
924 if (c->u.s.re)
925 regfree(c->u.s.re);
926 free(c->u.s.re);
927 free(c->u.s.repl.str);
928 }
929
930 /* see get_r_arg notes */
931 static char *
932 get_w_arg(Cmd *c, char *s)
933 {
934 char *p = semicolon_arg(s = chomp(s));
935 Wfile *w, **wp;
936
937 if (p == s)
938 leprintf("no file name");
939
940 for (wp = (Wfile **)wfiles.data; (size_t)(wp - (Wfile **)wfiles.data) < wfiles.size; wp++) {
941 if (strlen((*wp)->path) == (size_t)(p - s) && !strncmp(s, (*wp)->path, p - s)) {
942 c->u.file = (*wp)->file;
943 return p;
944 }
945 }
946
947 w = emalloc(sizeof(*w));
948 w->path = estrndup(s, p - s);
949
950 if (!(w->file = fopen(w->path, "w")))
951 leprintf("fopen failed");
952
953 c->u.file = w->file;
954
955 push(&wfiles, w);
956 return p;
957 }
958
959 static char *
960 get_y_arg(Cmd *c, char *s)
961 {
962 Rune delim;
963 char *p = s + strlen(s);
964 size_t rlen = echarntorune(&delim, s, p - s);
965 size_t nrunes1, nrunes2;
966
967 c->u.y.set1 = c->u.y.set2 = NULL;
968
969 s += rlen;
970 p = find_delim(s, delim, 0);
971 p -= escapes(s, p, delim, 1);
972 nrunes1 = utfnlen(s, p - s);
973 c->u.y.set1 = strtorunes(s, nrunes1);
974
975 s = p + rlen;
976 p = find_delim(s, delim, 0);
977 p -= escapes(s, p, delim, 1);
978 nrunes2 = utfnlen(s, p - s);
979
980 if (nrunes1 != nrunes2)
981 leprintf("different set lengths");
982
983 c->u.y.set2 = strtorunes(s, utfnlen(s, p - s));
984
985 return p + rlen;
986 }
987
988 static void
989 free_y_arg(Cmd *c)
990 {
991 free(c->u.y.set1);
992 free(c->u.y.set2);
993 }
994
995 /* see get_bt_arg notes */
996 static char *
997 get_colon_arg(Cmd *c, char *s)
998 {
999 char *p = semicolon_arg(s = chomp(s));
1000
1001 if (p == s)
1002 leprintf("no label name");
1003
1004 c->u.label = estrndup(s, p - s);
1005 push(&labels, (void *)(c - prog));
1006 return p;
1007 }
1008
1009 static char *
1010 get_lbrace_arg(Cmd *c, char *s)
1011 {
1012 push(&braces, (void *)(c - prog));
1013 return s;
1014 }
1015
1016 static char *
1017 get_rbrace_arg(Cmd *c, char *s)
1018 {
1019 Cmd *lbrace;
1020
1021 if (!braces.size)
1022 leprintf("extra }");
1023
1024 lbrace = prog + (ptrdiff_t)pop(&braces);
1025 lbrace->u.offset = c - prog;
1026 return s;
1027 }
1028
1029 /* s points to beginning of an argument that may be semicolon terminated
1030 * return pointer to semicolon or nul byte after string
1031 * or closing brace as to not force ; before }
1032 * FIXME: decide whether or not to eat trailing whitespace for arguments that
1033 * we allow semicolon/brace termination that POSIX doesn't
1034 * b, r, t, w, :
1035 * POSIX says trailing whitespace is part of label name, file name, etc.
1036 * we should probably eat it
1037 */
1038 static char *
1039 semicolon_arg(char *s)
1040 {
1041 char *p = strpbrk(s, ";}");
1042 if (!p)
1043 p = s + strlen(s);
1044 return p;
1045 }
1046
1047 static void
1048 run(void)
1049 {
1050 lineno = 0;
1051 if (braces.size)
1052 leprintf("extra {");
1053
1054 /* genbuf has already been initialized, patt will be in new_line
1055 * (or we'll halt) */
1056 stracpy(&hold, "");
1057
1058 insert_labels();
1059 next_file();
1060 new_line();
1061
1062 for (pc = prog; !gflags.halt; pc++)
1063 pc->fninfo->fn(pc);
1064 }
1065
1066 /* return true if we are in range for c, set c->in_match appropriately */
1067 static int
1068 in_range(Cmd *c)
1069 {
1070 if (match_addr(&c->range.beg)) {
1071 if (c->range.naddr == 2) {
1072 if (c->range.end.type == LINE && c->range.end.u.lineno <= lineno)
1073 c->in_match = 0;
1074 else
1075 c->in_match = 1;
1076 }
1077 return !c->negate;
1078 }
1079 if (c->in_match && match_addr(&c->range.end)) {
1080 c->in_match = 0;
1081 return !c->negate;
1082 }
1083 return c->in_match ^ c->negate;
1084 }
1085
1086 /* return true if addr matches current line */
1087 static int
1088 match_addr(Addr *a)
1089 {
1090 switch (a->type) {
1091 default:
1092 case IGNORE: return 0;
1093 case EVERY: return 1;
1094 case LINE: return lineno == a->u.lineno;
1095 case LAST:
1096 while (is_eof(file) && !next_file())
1097 ;
1098 return !file;
1099 case REGEX:
1100 lastre = a->u.re;
1101 return !regexec(a->u.re, patt.str, 0, NULL, 0);
1102 case LASTRE:
1103 if (!lastre)
1104 leprintf("no previous regex");
1105 return !regexec(lastre, patt.str, 0, NULL, 0);
1106 }
1107 }
1108
1109 /* move to next input file
1110 * stdin if first call and no files
1111 * return 0 for success and 1 for no more files
1112 */
1113 static int
1114 next_file(void)
1115 {
1116 static unsigned char first = 1;
1117
1118 if (file == stdin)
1119 clearerr(file);
1120 else if (file)
1121 fshut(file, "<file>");
1122 /* given no files, default to stdin */
1123 file = first && !*files ? stdin : NULL;
1124 first = 0;
1125
1126 while (!file && *files) {
1127 if (!strcmp(*files, "-")) {
1128 file = stdin;
1129 } else if (!(file = fopen(*files, "r"))) {
1130 /* warn this file didn't open, but move on to next */
1131 weprintf("fopen %s:", *files);
1132 ret = 1;
1133 }
1134 files++;
1135 }
1136
1137 return !file;
1138 }
1139
1140 /* test if stream is at EOF */
1141 static int
1142 is_eof(FILE *f)
1143 {
1144 int c;
1145
1146 if (!f || feof(f))
1147 return 1;
1148
1149 c = fgetc(f);
1150 if (c == EOF && ferror(f))
1151 eprintf("fgetc:");
1152 if (c != EOF && ungetc(c, f) == EOF)
1153 eprintf("ungetc EOF\n");
1154
1155 return c == EOF;
1156 }
1157
1158 /* perform writes that were scheduled
1159 * for aci this is check_puts(string, stdout)
1160 * for r this is write_file(path, stdout)
1161 */
1162 static void
1163 do_writes(void)
1164 {
1165 Cmd *c;
1166 size_t i;
1167
1168 for (i = 0; i < writes.size; i++) {
1169 c = writes.data[i];
1170 c->u.acir.print(c->u.acir.str.str, stdout);
1171 }
1172 writes.size = 0;
1173 }
1174
1175 /* used for r's u.acir.print()
1176 * FIXME: something like util's concat() would be better
1177 */
1178 static void
1179 write_file(char *path, FILE *out)
1180 {
1181 FILE *in = fopen(path, "r");
1182 if (!in) /* no file is treated as empty file */
1183 return;
1184
1185 while (read_line(in, &genbuf) != EOF)
1186 check_puts(genbuf.str, out);
1187
1188 fshut(in, path);
1189 }
1190
1191 static void
1192 check_puts(char *s, FILE *f)
1193 {
1194 if (s && fputs(s, f) == EOF)
1195 eprintf("fputs:");
1196 if (fputs("\n", f) == EOF)
1197 eprintf("fputs:");
1198 }
1199
1200 /* iterate from beg to end updating ranges so we don't miss any commands
1201 * e.g. sed -n '1d;1,3p' should still print lines 2 and 3
1202 */
1203 static void
1204 update_ranges(Cmd *beg, Cmd *end)
1205 {
1206 while (beg < end)
1207 in_range(beg++);
1208 }
1209
1210 /*
1211 * Sed functions
1212 */
1213 static void
1214 cmd_a(Cmd *c)
1215 {
1216 if (in_range(c))
1217 push(&writes, c);
1218 }
1219
1220 static void
1221 cmd_b(Cmd *c)
1222 {
1223 if (!in_range(c))
1224 return;
1225
1226 /* if we jump backwards update to end, otherwise update to destination */
1227 update_ranges(c + 1, c->u.jump > c ? c->u.jump : prog + pcap);
1228 pc = c->u.jump;
1229 }
1230
1231 static void
1232 cmd_c(Cmd *c)
1233 {
1234 if (!in_range(c))
1235 return;
1236
1237 /* write the text on the last line of the match */
1238 if (!c->in_match)
1239 check_puts(c->u.acir.str.str, stdout);
1240 /* otherwise start the next cycle without printing pattern space
1241 * effectively deleting the text */
1242 new_next();
1243 }
1244
1245 static void
1246 cmd_d(Cmd *c)
1247 {
1248 if (!in_range(c))
1249 return;
1250
1251 new_next();
1252 }
1253
1254 static void
1255 cmd_D(Cmd *c)
1256 {
1257 char *p;
1258
1259 if (!in_range(c))
1260 return;
1261
1262 if ((p = strchr(patt.str, '\n'))) {
1263 p++;
1264 memmove(patt.str, p, strlen(p) + 1);
1265 old_next();
1266 } else {
1267 new_next();
1268 }
1269 }
1270
1271 static void
1272 cmd_g(Cmd *c)
1273 {
1274 if (in_range(c))
1275 stracpy(&patt, hold.str);
1276 }
1277
1278 static void
1279 cmd_G(Cmd *c)
1280 {
1281 if (!in_range(c))
1282 return;
1283
1284 stracat(&patt, "\n");
1285 stracat(&patt, hold.str);
1286 }
1287
1288 static void
1289 cmd_h(Cmd *c)
1290 {
1291 if (in_range(c))
1292 stracpy(&hold, patt.str);
1293 }
1294
1295 static void
1296 cmd_H(Cmd *c)
1297 {
1298 if (!in_range(c))
1299 return;
1300
1301 stracat(&hold, "\n");
1302 stracat(&hold, patt.str);
1303 }
1304
1305 static void
1306 cmd_i(Cmd *c)
1307 {
1308 if (in_range(c))
1309 check_puts(c->u.acir.str.str, stdout);
1310 }
1311
1312 /* I think it makes sense to print invalid UTF-8 sequences in octal to satisfy
1313 * the "visually unambiguous form" sed(1p)
1314 */
1315 static void
1316 cmd_l(Cmd *c)
1317 {
1318 Rune r;
1319 char *p, *end;
1320 size_t rlen;
1321
1322 char *escapes[] = { /* FIXME: 7 entries and search instead of 127 */
1323 ['\\'] = "\\\\", ['\a'] = "\\a", ['\b'] = "\\b",
1324 ['\f'] = "\\f" , ['\r'] = "\\r", ['\t'] = "\\t",
1325 ['\v'] = "\\v" , [0x7f] = NULL, /* fill out the table */
1326 };
1327
1328 if (!in_range(c))
1329 return;
1330
1331 /* FIXME: line wrapping. sed(1p) says "length at which folding occurs is
1332 * unspecified, but should be appropraite for the output device"
1333 * just wrap at 80 Runes?
1334 */
1335 for (p = patt.str, end = p + strlen(p); p < end; p += rlen) {
1336 if (isascii(*p) && escapes[(unsigned int)*p]) {
1337 fputs(escapes[(unsigned int)*p], stdout);
1338 rlen = 1;
1339 } else if (!(rlen = charntorune(&r, p, end - p))) {
1340 /* ran out of chars, print the bytes of the short sequence */
1341 for (; p < end; p++)
1342 printf("\\%03hho", (unsigned char)*p);
1343 break;
1344 } else if (r == Runeerror) {
1345 for (; rlen; rlen--, p++)
1346 printf("\\%03hho", (unsigned char)*p);
1347 } else {
1348 while (fwrite(p, rlen, 1, stdout) < 1 && errno == EINTR)
1349 ;
1350 if (ferror(stdout))
1351 eprintf("fwrite:");
1352 }
1353 }
1354 check_puts("$", stdout);
1355 }
1356
1357 static void
1358 cmd_n(Cmd *c)
1359 {
1360 if (!in_range(c))
1361 return;
1362
1363 if (!gflags.n)
1364 check_puts(patt.str, stdout);
1365 do_writes();
1366 new_line();
1367 }
1368
1369 static void
1370 cmd_N(Cmd *c)
1371 {
1372 if (!in_range(c))
1373 return;
1374 do_writes();
1375 app_line();
1376 }
1377
1378 static void
1379 cmd_p(Cmd *c)
1380 {
1381 if (in_range(c))
1382 check_puts(patt.str, stdout);
1383 }
1384
1385 static void
1386 cmd_P(Cmd *c)
1387 {
1388 char *p;
1389
1390 if (!in_range(c))
1391 return;
1392
1393 if ((p = strchr(patt.str, '\n')))
1394 *p = '\0';
1395
1396 check_puts(patt.str, stdout);
1397
1398 if (p)
1399 *p = '\n';
1400 }
1401
1402 static void
1403 cmd_q(Cmd *c)
1404 {
1405 if (!in_range(c))
1406 return;
1407
1408 if (!gflags.n)
1409 check_puts(patt.str, stdout);
1410 do_writes();
1411 gflags.halt = 1;
1412 }
1413
1414 static void
1415 cmd_r(Cmd *c)
1416 {
1417 if (in_range(c))
1418 push(&writes, c);
1419 }
1420
1421 static void
1422 cmd_s(Cmd *c)
1423 {
1424 String tmp;
1425 Rune r;
1426 size_t plen, rlen, len;
1427 char *p, *s, *end;
1428 unsigned int matches = 0, last_empty = 1, qflag = 0, cflags = 0;
1429 regex_t *re;
1430 regmatch_t *rm, *pmatch = NULL;
1431
1432 if (!in_range(c))
1433 return;
1434
1435 if (!c->u.s.re && !lastre)
1436 leprintf("no previous regex");
1437
1438 re = c->u.s.re ? c->u.s.re : lastre;
1439 lastre = re;
1440
1441 plen = re->re_nsub + 1;
1442 pmatch = ereallocarray(NULL, plen, sizeof(regmatch_t));
1443
1444 *genbuf.str = '\0';
1445 s = patt.str;
1446
1447 while (!qflag && !regexec(re, s, plen, pmatch, cflags)) {
1448 cflags = REG_NOTBOL; /* match against beginning of line first time, but not again */
1449 if (!*s) /* match against empty string first time, but not again */
1450 qflag = 1;
1451
1452 /* don't substitute if last match was not empty but this one is.
1453 * s_a*_._g
1454 * foobar -> .f.o.o.b.r.
1455 */
1456 if ((last_empty || pmatch[0].rm_eo) &&
1457 (++matches == c->u.s.occurrence || !c->u.s.occurrence)) {
1458 /* copy over everything before the match */
1459 strnacat(&genbuf, s, pmatch[0].rm_so);
1460
1461 /* copy over replacement text, taking into account &, backreferences, and \ escapes */
1462 for (p = c->u.s.repl.str, len = strcspn(p, "\\&"); *p; len = strcspn(++p, "\\&")) {
1463 strnacat(&genbuf, p, len);
1464 p += len;
1465 switch (*p) {
1466 default: leprintf("this shouldn't be possible");
1467 case '\0':
1468 /* we're at the end, back up one so the ++p will put us on
1469 * the null byte to break out of the loop */
1470 --p;
1471 break;
1472 case '&':
1473 strnacat(&genbuf, s + pmatch[0].rm_so, pmatch[0].rm_eo - pmatch[0].rm_so);
1474 break;
1475 case '\\':
1476 if (isdigit(*++p)) { /* backreference */
1477 /* only need to check here if using lastre, otherwise we checked when building */
1478 if (!c->u.s.re && (size_t)(*p - '0') > re->re_nsub)
1479 leprintf("back reference number greater than number of groups");
1480 rm = &pmatch[*p - '0'];
1481 strnacat(&genbuf, s + rm->rm_so, rm->rm_eo - rm->rm_so);
1482 } else { /* character after backslash taken literally (well one byte, but it works) */
1483 strnacat(&genbuf, p, 1);
1484 }
1485 break;
1486 }
1487 }
1488 } else {
1489 /* not replacing, copy over everything up to and including the match */
1490 strnacat(&genbuf, s, pmatch[0].rm_eo);
1491 }
1492
1493 if (!pmatch[0].rm_eo) { /* empty match, advance one rune and add it to output */
1494 end = s + strlen(s);
1495 rlen = charntorune(&r, s, end - s);
1496
1497 if (!rlen) { /* ran out of bytes, copy short sequence */
1498 stracat(&genbuf, s);
1499 s = end;
1500 } else { /* copy whether or not it's a good rune */
1501 strnacat(&genbuf, s, rlen);
1502 s += rlen;
1503 }
1504 }
1505 last_empty = !pmatch[0].rm_eo;
1506 s += pmatch[0].rm_eo;
1507 }
1508 free(pmatch);
1509
1510 if (!(matches && matches >= c->u.s.occurrence)) /* no replacement */
1511 return;
1512
1513 gflags.s = 1;
1514
1515 stracat(&genbuf, s);
1516
1517 tmp = patt;
1518 patt = genbuf;
1519 genbuf = tmp;
1520
1521 if (c->u.s.p)
1522 check_puts(patt.str, stdout);
1523 if (c->u.s.file)
1524 check_puts(patt.str, c->u.s.file);
1525 }
1526
1527 static void
1528 cmd_t(Cmd *c)
1529 {
1530 if (!in_range(c) || !gflags.s)
1531 return;
1532
1533 /* if we jump backwards update to end, otherwise update to destination */
1534 update_ranges(c + 1, c->u.jump > c ? c->u.jump : prog + pcap);
1535 pc = c->u.jump;
1536 gflags.s = 0;
1537 }
1538
1539 static void
1540 cmd_w(Cmd *c)
1541 {
1542 if (in_range(c))
1543 check_puts(patt.str, c->u.file);
1544 }
1545
1546 static void
1547 cmd_x(Cmd *c)
1548 {
1549 String tmp;
1550
1551 if (!in_range(c))
1552 return;
1553
1554 tmp = patt;
1555 patt = hold;
1556 hold = tmp;
1557 }
1558
1559 static void
1560 cmd_y(Cmd *c)
1561 {
1562 String tmp;
1563 Rune r, *rp;
1564 size_t n, rlen;
1565 char *s, *end, buf[UTFmax];
1566
1567 if (!in_range(c))
1568 return;
1569
1570 *genbuf.str = '\0';
1571 for (s = patt.str, end = s + strlen(s); *s; s += rlen) {
1572 if (!(rlen = charntorune(&r, s, end - s))) { /* ran out of chars, copy rest */
1573 stracat(&genbuf, s);
1574 break;
1575 } else if (r == Runeerror) { /* bad UTF-8 sequence, copy bytes */
1576 strnacat(&genbuf, s, rlen);
1577 } else {
1578 for (rp = c->u.y.set1; *rp; rp++)
1579 if (*rp == r)
1580 break;
1581 if (*rp) { /* found r in set1, replace with Rune from set2 */
1582 n = runetochar(buf, c->u.y.set2 + (rp - c->u.y.set1));
1583 strnacat(&genbuf, buf, n);
1584 } else {
1585 strnacat(&genbuf, s, rlen);
1586 }
1587 }
1588 }
1589 tmp = patt;
1590 patt = genbuf;
1591 genbuf = tmp;
1592 }
1593
1594 static void
1595 cmd_colon(Cmd *c)
1596 {
1597 }
1598
1599 static void
1600 cmd_equal(Cmd *c)
1601 {
1602 if (in_range(c))
1603 printf("%zu\n", lineno);
1604 }
1605
1606 static void
1607 cmd_lbrace(Cmd *c)
1608 {
1609 Cmd *jump;
1610
1611 if (in_range(c))
1612 return;
1613
1614 /* update ranges on all commands we skip */
1615 jump = prog + c->u.offset;
1616 update_ranges(c + 1, jump);
1617 pc = jump;
1618 }
1619
1620 static void
1621 cmd_rbrace(Cmd *c)
1622 {
1623 }
1624
1625 /* not actually a sed function, but acts like one, put in last spot of script */
1626 static void
1627 cmd_last(Cmd *c)
1628 {
1629 if (!gflags.n)
1630 check_puts(patt.str, stdout);
1631 do_writes();
1632 new_next();
1633 }
1634
1635 /*
1636 * Actions
1637 */
1638
1639 /* read new line, continue current cycle */
1640 static void
1641 new_line(void)
1642 {
1643 while (read_line(file, &patt) == EOF) {
1644 if (next_file()) {
1645 gflags.halt = 1;
1646 return;
1647 }
1648 }
1649 gflags.s = 0;
1650 lineno++;
1651 }
1652
1653 /* append new line, continue current cycle
1654 * FIXME: used for N, POSIX specifies do not print pattern space when out of
1655 * input, but GNU does so busybox does as well. Currently we don't.
1656 * Should we?
1657 */
1658 static void
1659 app_line(void)
1660 {
1661 while (read_line(file, &genbuf) == EOF) {
1662 if (next_file()) {
1663 gflags.halt = 1;
1664 return;
1665 }
1666 }
1667
1668 stracat(&patt, "\n");
1669 stracat(&patt, genbuf.str);
1670 gflags.s = 0;
1671 lineno++;
1672 }
1673
1674 /* read new line, start new cycle */
1675 static void
1676 new_next(void)
1677 {
1678 *patt.str = '\0';
1679 update_ranges(pc + 1, prog + pcap);
1680 new_line();
1681 pc = prog - 1;
1682 }
1683
1684 /* keep old pattern space, start new cycle */
1685 static void
1686 old_next(void)
1687 {
1688 update_ranges(pc + 1, prog + pcap);
1689 pc = prog - 1;
1690 }
1691
1692 int
1693 main(int argc, char *argv[])
1694 {
1695 char *arg;
1696 int script = 0;
1697
1698 ARGBEGIN {
1699 case 'n':
1700 gflags.n = 1;
1701 break;
1702 case 'r':
1703 case 'E':
1704 gflags.E = 1;
1705 break;
1706 case 'e':
1707 arg = EARGF(usage());
1708 compile(arg, 0);
1709 script = 1;
1710 break;
1711 case 'f':
1712 arg = EARGF(usage());
1713 compile(arg, 1);
1714 script = 1;
1715 break;
1716 default : usage();
1717 } ARGEND
1718
1719 /* no script to run */
1720 if (!script && !argc)
1721 usage();
1722
1723 /* no script yet, next argument is script */
1724 if (!script)
1725 compile(*argv++, 0);
1726
1727 /* shrink/grow memory to fit and add our last instruction */
1728 resize((void **)&prog, &pcap, sizeof(*prog), pc - prog + 1, NULL);
1729 pc = prog + pcap - 1;
1730 pc->fninfo = &(Fninfo){ cmd_last, NULL, NULL, 0 };
1731
1732 files = argv;
1733 run();
1734
1735 ret |= fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>");
1736
1737 return ret;
1738 }