/* $NetBSD: gnum4.c,v 1.11.2.2 2023/05/28 10:07:11 martin Exp $ */ /* $OpenBSD: gnum4.c,v 1.39 2008/08/21 21:01:04 espie Exp $ */ /* * Copyright (c) 1999 Marc Espie * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * functions needed to support gnu-m4 extensions, including a fake freezing */ #if HAVE_NBTOOL_CONFIG_H #include "nbtool_config.h" #endif #include __RCSID("$NetBSD: gnum4.c,v 1.11.2.2 2023/05/28 10:07:11 martin Exp $"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include "mdef.h" #include "stdd.h" #include "extern.h" int mimic_gnu = 0; #ifndef SIZE_T_MAX #define SIZE_T_MAX (size_t)~0ull #endif /* * Support for include path search * First search in the current directory. * If not found, and the path is not absolute, include path kicks in. * First, -I options, in the order found on the command line. * Then M4PATH env variable */ struct path_entry { char *name; struct path_entry *next; } *first, *last; static struct path_entry *new_path_entry(const char *); static void ensure_m4path(void); static struct input_file *dopath(struct input_file *, const char *); static struct path_entry * new_path_entry(const char *dirname) { struct path_entry *n; n = malloc(sizeof(struct path_entry)); if (!n) errx(1, "out of memory"); n->name = strdup(dirname); if (!n->name) errx(1, "out of memory"); n->next = 0; return n; } void addtoincludepath(const char *dirname) { struct path_entry *n; n = new_path_entry(dirname); if (last) { last->next = n; last = n; } else last = first = n; } static void ensure_m4path(void) { static int envpathdone = 0; char *envpath; char *sweep; char *path; if (envpathdone) return; envpathdone = TRUE; envpath = getenv("M4PATH"); if (!envpath) return; /* for portability: getenv result is read-only */ envpath = strdup(envpath); if (!envpath) errx(1, "out of memory"); for (sweep = envpath; (path = strsep(&sweep, ":")) != NULL;) addtoincludepath(path); free(envpath); } static struct input_file * dopath(struct input_file *i, const char *filename) { char path[MAXPATHLEN]; struct path_entry *pe; FILE *f; for (pe = first; pe; pe = pe->next) { snprintf(path, sizeof(path), "%s/%s", pe->name, filename); if ((f = fopen(path, "r")) != 0) { set_input(i, f, path); return i; } } return NULL; } struct input_file * fopen_trypath(struct input_file *i, const char *filename) { FILE *f; f = fopen(filename, "r"); if (f != NULL) { set_input(i, f, filename); return i; } if (filename[0] == '/') return NULL; ensure_m4path(); return dopath(i, filename); } void doindir(const char *argv[], int argc) { ndptr n; struct macro_definition *p; n = lookup(argv[2]); if (n == NULL || (p = macro_getdef(n)) == NULL) m4errx(1, "indir: undefined macro %s.", argv[2]); argv[1] = p->defn; eval(argv+1, argc-1, p->type, is_traced(n)); } void dobuiltin(const char *argv[], int argc) { ndptr p; argv[1] = NULL; p = macro_getbuiltin(argv[2]); if (p != NULL) eval(argv+1, argc-1, macro_builtin_type(p), is_traced(p)); else m4errx(1, "unknown builtin %s.", argv[2]); } /* We need some temporary buffer space, as pb pushes BACK and substitution * proceeds forward... */ static char *buffer; static size_t bufsize = 0; static size_t current = 0; static void addchars(const char *, size_t); static void addchar(int); static char *twiddle(const char *); static char *getstring(void); static void exit_regerror(int, const char *, regex_t *) __dead; static void do_subst(const char *, const char *, regex_t *, const char *, regmatch_t *); static void do_regexpindex(const char *, const char *, regex_t *, regmatch_t *); static void do_regexp(const char *, const char *, regex_t *, const char *, regmatch_t *); static void add_sub(size_t, const char *, regex_t *, regmatch_t *); static void add_replace(const char *, regex_t *, const char *, regmatch_t *); #define addconstantstring(s) addchars((s), sizeof(s)-1) static void addchars(const char *c, size_t n) { if (n == 0) return; while (current + n > bufsize) { if (bufsize == 0) bufsize = 1024; else bufsize *= 2; buffer = xrealloc(buffer, bufsize, NULL); } memcpy(buffer+current, c, n); current += n; } static void addchar(int c) { if (current +1 > bufsize) { if (bufsize == 0) bufsize = 1024; else bufsize *= 2; buffer = xrealloc(buffer, bufsize, NULL); } buffer[current++] = c; } static char * getstring(void) { addchar('\0'); current = 0; return buffer; } static void exit_regerror(int er, const char *pat, regex_t *re) { size_t errlen; char *errbuf; errlen = regerror(er, re, NULL, 0); errbuf = xalloc(errlen, "malloc in regerror: %lu", (unsigned long)errlen); regerror(er, re, errbuf, errlen); m4errx(1, "regular expression error: %s for: `%s'", errbuf, pat); } static void add_sub(size_t n, const char *string, regex_t *re, regmatch_t *pm) { if (n > re->re_nsub) { if (!quiet) warnx("No subexpression %zu", n); if (fatal_warnings) exit(EXIT_FAILURE); } /* Subexpressions that did not match are * not an error. */ else if (pm[n].rm_so != -1 && pm[n].rm_eo != -1) { addchars(string + pm[n].rm_so, pm[n].rm_eo - pm[n].rm_so); } } /* Add replacement string to the output buffer, recognizing special * constructs and replacing them with substrings of the original string. */ static void add_replace(const char *string, regex_t *re, const char *replace, regmatch_t *pm) { const char *p; for (p = replace; *p != '\0'; p++) { if (*p == '&' && !mimic_gnu) { add_sub(0, string, re, pm); continue; } if (*p == '\\') { if (p[1] == '\\') { addchar(p[1]); p++; continue; } if (p[1] == '&') { if (mimic_gnu) add_sub(0, string, re, pm); else addchar(p[1]); p++; continue; } if (isdigit((unsigned char)p[1])) { add_sub(*(++p) - '0', string, re, pm); continue; } } addchar(*p); } } static void do_subst(const char *pat, const char *string, regex_t *re, const char *replace, regmatch_t *pm) { int error; int flags = 0; const char *last_match = NULL; while ((error = regexec(re, string, re->re_nsub+1, pm, flags)) == 0) { if (pm[0].rm_eo != 0) { if (string[pm[0].rm_eo-1] == '\n') flags = 0; else flags = REG_NOTBOL; } /* NULL length matches are special... We use the `vi-mode' * rule: don't allow a NULL-match at the last match * position. */ if (pm[0].rm_so == pm[0].rm_eo && string + pm[0].rm_so == last_match) { if (*string == '\0') return; addchar(*string); if (*string++ == '\n') flags = 0; else flags = REG_NOTBOL; continue; } last_match = string + pm[0].rm_so; addchars(string, pm[0].rm_so); add_replace(string, re, replace, pm); string += pm[0].rm_eo; buffer[current] = '\0'; } while (*string) addchar(*string++); if (error != REG_NOMATCH) exit_regerror(error, pat, re); pbstr(string); } static void do_regexp(const char *pat, const char *string, regex_t *re, const char *replace, regmatch_t *pm) { int error; switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { case 0: add_replace(string, re, replace, pm); pbstr(getstring()); break; case REG_NOMATCH: break; default: exit_regerror(error, pat, re); } } static void do_regexpindex(const char *pat, const char *string, regex_t *re, regmatch_t *pm) { int error; switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { case 0: pbunsigned(pm[0].rm_so); break; case REG_NOMATCH: pbnum(-1); break; default: exit_regerror(error, pat, re); } } /* In Gnu m4 mode, parentheses for backmatch don't work like POSIX 1003.2 * says. So we twiddle with the regexp before passing it to regcomp. */ static char * twiddle(const char *p) { /* + at start of regexp is a normal character for Gnu m4 */ if (*p == '^') { addchar(*p); p++; } if (*p == '+') { addchar('\\'); } /* This could use strcspn for speed... */ while (*p != '\0') { if (*p == '\\') { switch(p[1]) { case '(': case ')': case '|': addchar(p[1]); break; case 'w': addconstantstring("[_a-zA-Z0-9]"); break; case 'W': addconstantstring("[^_a-zA-Z0-9]"); break; case '<': addconstantstring("[[:<:]]"); break; case '>': addconstantstring("[[:>:]]"); break; default: addchars(p, 2); break; } p+=2; continue; } if (strchr("()|{}", *p) != NULL) addchar('\\'); addchar(*p); p++; } return getstring(); } static int checkempty(const char *argv[], int argc) { const char *s; size_t len; if (argc != 3 && argv[3][0] != '\0') return 0; if (argc == 3) { if (!quiet) warnx("Too few arguments to patsubst"); if (fatal_warnings) exit(EXIT_FAILURE); } if (argv[4] && argc > 4) len = strlen(argv[4]); else len = 0; for (s = argv[2]; *s != '\0'; s++) { addchars(argv[4], len); addchar(*s); } return 1; } /* patsubst(string, regexp, opt replacement) */ /* argv[2]: string * argv[3]: regexp * argv[4]: opt rep */ void dopatsubst(const char *argv[], int argc) { if (argc < 3) { if (!quiet) warnx("Too few arguments to patsubst"); if (fatal_warnings) exit(EXIT_FAILURE); return; } /* special case: empty regexp */ if (!checkempty(argv, argc)) { const char *pat; int error; regex_t re; regmatch_t *pmatch; int mode = REG_EXTENDED; size_t l = strlen(argv[3]); if (!mimic_gnu || (argv[3][0] == '^') || (l > 0 && argv[3][l-1] == '$')) mode |= REG_NEWLINE; pat = mimic_gnu ? twiddle(argv[3]) : argv[3]; error = regcomp(&re, pat, mode); if (error != 0) exit_regerror(error, pat, &re); pmatch = xalloc(sizeof(regmatch_t) * (re.re_nsub+1), NULL); do_subst(pat, argv[2], &re, argc > 4 && argv[4] != NULL ? argv[4] : "", pmatch); free(pmatch); regfree(&re); } pbstr(getstring()); } void doregexp(const char *argv[], int argc) { int error; regex_t re; regmatch_t *pmatch; const char *pat; if (argc < 3) { if (!quiet) warnx("Too few arguments to regexp"); if (fatal_warnings) exit(EXIT_FAILURE); return; } if (checkempty(argv, argc)) { return; } pat = mimic_gnu ? twiddle(argv[3]) : argv[3]; error = regcomp(&re, pat, REG_EXTENDED); if (error != 0) exit_regerror(error, pat, &re); pmatch = xalloc(sizeof(regmatch_t) * (re.re_nsub+1), NULL); if (argv[4] == NULL || argc == 4) do_regexpindex(pat, argv[2], &re, pmatch); else do_regexp(pat, argv[2], &re, argv[4], pmatch); free(pmatch); regfree(&re); } void doformat(const char *argv[], int argc) { const char *format = argv[2]; int pos = 3; int left_padded; long width; size_t l; const char *thisarg; char temp[2]; size_t extra; while (*format != 0) { if (*format != '%') { addchar(*format++); continue; } format++; if (*format == '%') { addchar(*format++); continue; } if (*format == 0) { addchar('%'); break; } if (*format == '*') { format++; if (pos >= argc) m4errx(1, "Format with too many format specifiers."); width = strtol(argv[pos++], NULL, 10); } else { char *eformat; width = strtol(format, &eformat, 10); format = eformat; } if (width < 0) { left_padded = 1; width = -width; } else { left_padded = 0; } if (*format == '.') { format++; if (*format == '*') { format++; if (pos >= argc) m4errx(1, "Format with too many format specifiers."); extra = strtol(argv[pos++], NULL, 10); } else { char *eformat; extra = strtol(format, &eformat, 10); format = eformat; } } else { extra = SIZE_T_MAX; } if (pos >= argc) m4errx(1, "Format with too many format specifiers."); switch(*format) { case 's': thisarg = argv[pos++]; break; case 'c': temp[0] = strtoul(argv[pos++], NULL, 10); temp[1] = 0; thisarg = temp; break; default: m4errx(1, "Unsupported format specification: %s.", argv[2]); } format++; l = strlen(thisarg); if (l > extra) l = extra; if (!left_padded) { while (l < (size_t)width--) addchar(' '); } addchars(thisarg, l); if (left_padded) { while (l < (size_t)width--) addchar(' '); } } pbstr(getstring()); } void doesyscmd(const char *cmd) { int p[2]; pid_t pid, cpid; const char *argv[4]; int cc; int status; /* Follow gnu m4 documentation: first flush buffers. */ fflush(NULL); argv[0] = "sh"; argv[1] = "-c"; argv[2] = cmd; argv[3] = NULL; /* Just set up standard output, share stderr and stdin with m4 */ if (pipe(p) == -1) err(1, "bad pipe"); switch(cpid = fork()) { case -1: err(1, "bad fork"); /* NOTREACHED */ case 0: (void) close(p[0]); (void) dup2(p[1], 1); (void) close(p[1]); execv(_PATH_BSHELL, __UNCONST(argv)); exit(1); default: /* Read result in two stages, since m4's buffer is * pushback-only. */ (void) close(p[1]); do { char result[BUFSIZE]; cc = read(p[0], result, sizeof result); if (cc > 0) addchars(result, cc); } while (cc > 0 || (cc == -1 && errno == EINTR)); (void) close(p[0]); while ((pid = wait(&status)) != cpid && pid >= 0) continue; pbstr(getstring()); } } void getdivfile(const char *name) { FILE *f; int c; f = fopen(name, "r"); if (!f) return; while ((c = getc(f))!= EOF) putc(c, active); (void) fclose(f); } #ifdef REAL_FREEZE void freeze_state(const char *fname) { FILE *f; if ((f = fopen(fname, "wb")) == NULL) m4errx(EXIT_FAILURE, "Can't open output freeze file `%s' (%s)", fname, strerror(errno)); fprintf(f, "# This is a frozen state file generated by %s\nV1\n", getprogname()); fprintf(f, "Q%zu,%zu\n%s%s\n", strlen(lquote), strlen(rquote), lquote, rquote); fprintf(f, "C%zu,%zu\n%s%s\n", strlen(scommt), strlen(ecommt), scommt, ecommt); dump_state(f); /* XXX: diversions? */ fprintf(f, "D-1,0\n"); fprintf(f, "# End of frozen state file\n"); fclose(f); } void thaw_state(const char *fname) { char *name = NULL; size_t nl, namelen = 0; char *defn = NULL; size_t dl, defnlen = 0; size_t lineno = 0; char line[1024], *ptr, type; FILE *f; if ((f = fopen(fname, "rb")) == NULL) m4errx(EXIT_FAILURE, "Can't open frozen file `%s' (%s)", fname, strerror(errno)); #define GET() if (fgets(line, (int)sizeof(line), f) == NULL) goto out #define GETSTR(s, l) if (fread(s, 1, l, f) != l) goto out; else s[l] = '\0' GET(); /* comment */ GET(); /* version */ if ((ptr = strrchr(line, '\n')) != NULL) *ptr = '\0'; if (strcmp(line, "V1") != 0) m4errx(EXIT_FAILURE, "Bad frozen version `%s'", line); for (;;) { GET(); lineno++; switch (*line) { case '\n': continue; case '#': free(name); free(defn); fclose(f); return; default: if (sscanf(line, "%c%zu,%zu\n", &type, &nl, &dl) != 3) m4errx(EXIT_FAILURE, "%s, %zu: Bad line `%s'", fname, lineno, line); break; } switch (type) { case 'Q': if (nl >= sizeof(lquote) || dl >= sizeof(rquote)) m4errx(EXIT_FAILURE, "%s, %zu: Quote too long", fname, lineno); GETSTR(lquote, nl); GETSTR(rquote, dl); break; case 'C': if (nl >= sizeof(scommt) || dl >= sizeof(ecommt)) m4errx(EXIT_FAILURE, "%s, %zu: Comment too long", fname, lineno); GETSTR(scommt, nl); GETSTR(ecommt, dl); break; case 'T': case 'F': if (nl >= namelen) name = xrealloc(name, namelen = nl + 1, "name grow"); if (dl >= defnlen) defn = xrealloc(defn, defnlen = dl + 1, "defn grow"); GETSTR(name, nl); GETSTR(defn, dl); macro_pushdef(name, defn); break; case 'D': /* XXX: Not implemented */ break; default: m4errx(EXIT_FAILURE, "%s, %zu: Unknown type %c", fname, lineno,type); } } out: m4errx(EXIT_FAILURE, "Unexpected end of file in `%s'", fname); } #endif