
{ macro3.pas }
{ From:
    "Software Tools in Pascal"
    Brian W. Kernighan, P. J. Plauger
    Addison-Wesley, 1981
    Chapter 8
    1981 by Bell Labs. and Whitesmiths Ltd.
}
{
macro reads its input, looking for macro definitions of the form

 define(ident,string)

and writes its output with each subsequent instance of the identifier ident replaced by the arbitrary sequence of characters string.

Within a replacement string, any dollar sign $ followed by a digit is replaced by an argument corresponding to that digit. Arguments are written as a parenthesized list of strings following an instance of the identifier, e.g.,

ident(arg1,arg2,...)

So $1 is replaced in the replacement string by argt, $2 by arg2, and so on; $0 is replaced by ident. Missing arguments are taken as null strings; extra arguments are ignored. The replacement string in a definition is expanded before the definition occurs, except that any sequence of characters between a grave ` and a balancing apostrophe ' is taken literally, with the grave and apostrophe removed. Thus, it is possible to make an alias for define by writing

define(def,`define($1,$2)')

Additional predefined built-ins are:

ifelse(a,b,c,d) is replaced by the string c if the string a exactly matches the string b; otherwise it is replaced by the string d.

expr(expression) is replaced by the decimal string representation of the numeric value of expression. For correct operation, the expression must consist of parentheses, integer operands written as decimal digit strings, and the operators +, -, as, / (integer division), and % (remainder). Multiplication and division bind tighter than addition and subtraction, but parentheses may be used to alter this order.

substr(s,m,n) is replaced by the substring of s starting at location m (counting from one) and continuing at most n characters. If n is omitted, it is taken as a very large number; if m is outside the string, the replacement string is null. m and n may be expressions suitable for expr.

1en(s) is replaced by the string representing the length of its argument in characters.

changeq(xy) changes the quote characters to x and y. changeq() changes them back to ` and '.

Each replacement string is rescanned for further possible replacements, permitting multi-level definitions to be expanded to final form.

EXAMPLE
-------
The macro len could be written in terms of the other built-ins as:

define(`1en',`ifelse($1,,0.`expr(1+len(substr($1,2)))')')

BUGS
----
A recursive definition of the form define(x,x) will cause an infinite loop.

Expression evaluation is fragile. There is no unary minus.

It is unwise to use parentheses as quote characters.

Some general functions are defined in utils.pas

Data Structures
---------------

    typestk plev  callstk      argstk           evalstk
     +--+   +--+   +--+         +--+    +--+--+--+-----+-----
     |  |   |  |   |  |         |  |    |  |  |  |     |    |
     +--+   +--+   +--+         +--+    +--+--+--+-----+-----
cp-->|  |   |  |   | -|----|    |  |       ^  ^  ^
     +--+   +--+   +--+    |    +--+       |  |  |
     |  |   |  |   |  |    |--->| -|-------|  |  |
     +--+   +--+   +--+     ap  +--+          |  |
     |  |   |  |   |  |         | -|----------|  |
     |  |   |  |   |  |         +--+             |
                                | -|-------------|
                                +--+
                                |  |
}


{ maccons -- const declarations for macro }
const
  BUFSIZE = 1000;   { size of pushback buffer }
  MAXCHARS = 5000;  { size of name-defn table }
  MAXPOS = 500;     { size of position arrays }
  CALLSIZE = MAXPOS;
  ARGSIZE = MAXPOS;
  EVALSIZE = MAXCHARS;
  MAXDEF = MAXSTR;  { max chars in a defn }
  MAXTOK = MAXSTR;  { max chars in a token }
  HASHSIZE = 53;    { size of hash table }
  ARGFLAG = DOLLAR; { macro invocation character }


{ mactype -- type declarations for macro }
type
  charpos = 1..MAXCHARS;
  charbuf = array [1..MAXCHARS] of character;
  posbuf = array [1..MAXPOS] of charpos;
  pos = 0..MAXPOS;
  sttype = (DEFTYPE, MACTYPE, IFTYPE, SUBTYPE,
            EXPRTYPE, LENTYPE, CHQTYPE); { symbol table types }
  ndptr = ^ndblock;
  ndblock =
    record
      name : charpos;
      defn : charpos;
      kind : sttype;
      nextptr : ndptr
    end;


{ macvar -- var declarations for macro }
var
  buf : array [1..BUFSIZE] of character;  { for pushback }
  bp : 0..BUFSIZE;   { next available character; init=0 }

  hashtab : array [1..HASHSIZE] of ndptr;
  ndtable : charbuf;
  nexttab : charpos; { first free position in ndtable }

  callstk : posbuf;  { call stack }
  cp : pos;          { current call stack position }
  typestk : array[1..CALLSIZE] of sttype;  { type }
  plev : array [1..CALLSIZE] of integer;  { paren level }
  argstk : posbuf;   { argument stack for this call }
  ap : pos;          { current argument position }
  evalstk : charbuf; { evaluation stack }
  ep : charpos;      { first character unused in evalstk }

  { built-ins: }
  defname : string;  { value is 'define' }
  exprname : string; { value is 'expr' }
  subname : string;  { value is 'substr' }
  ifname : string;   { value is 'ifelse' }
  lenname : string;  { value is 'len' }
  chqname : string;  { value is 'changeq' }

  null : string;    { value is '' }
  lquote : character;  { left quote character }
  rquote : character;  { right quote character }


{ macro -- expand macros with arguments }
procedure macro;
var
  defn : string;
  token : string;
  toktype : sttype;
  t : character;
  nlpar : integer;
begin
  initmacro;
  install(defname, null, DEFTYPE);
  install(exprname, null, EXPRTYPE);
  install(subname, null, SUBTYPE);
  install(ifname, null, IFTYPE);
  install(lenname, null, LENTYPE);
  install(chqname, null, CHQTYPE);
  cp := 0;
  ap := 1;
  ep := 1;
  while (gettok(token, MAXTOK) <> ENDFILE) do
    if (isletter(token[1])) then begin
      if (not lookup(token, defn, toktype)) then
        puttok(token)
      else begin  { defined; put it in eval stack }
        cp := cp + 1;
        if (cp > CALLSIZE) then
          error('macro: call stack overflow');
        callstk[cp] := ap;
        typestk[cp] := toktype;
        ap := push(ep, argstk, ap);
        puttok(defn);  { push definition }
        putchr(ENDSTR);
        ap := push(ep, argstk, ap);
        puttok(token);  { stack name }
        putchr(ENDSTR);
        ap := push(ep, argstk, ap);
        t := gettok(token, MAXTOK);  { peek at next }
        pbstr(token);
        if (t <> LPAREN) then begin  { add () }
          putback(RPAREN);
          putback(LPAREN)
        end;
        plev[cp] := 0
      end
    end
    else if (token[1] = lquote) then begin  { strip quotes }
      nlpar := 1;
      repeat
        t := gettok(token, MAXTOK);
        if (t = rquote) then
          nlpar := nlpar - 1
        else if (t = lquote) then
          nlpar := nlpar + 1
        else if (t = ENDFILE) then
          error('macro: missing right quote');
        if (nlpar > 0) then
          puttok(token)
      until (nlpar = 0)
    end
    else if (cp = 0) then   { not in a macro at all }
      puttok(token)
    else if (token[1] = LPAREN) then begin
      if (plev[cp] > 0) then
        puttok(token);
      plev[cp] := plev[cp] + 1
    end
    else if (token[1] = RPAREN) then begin
      plev[cp] := plev[cp] - 1;
      if (plev[cp] > 0) then
        puttok(token)
      else begin  { end of argument list }
        putchr(ENDSTR);
        eval(argstk, typestk[cp], callstk[cp], ap-1);
        ap := callstk[cp];  { pop eval stack }
        ep := argstk[ap];
        cp := cp - 1
      end
    end
    else if (token[1]=COMMA) and (plev[cp]=1) then begin
      putchr(ENDSTR);  { new argument }
      ap := push(ep, argstk, ap)
    end
    else
      puttok(token);  { just stack it }
  if (cp <> 0) then
    error('macro: unexpected end of input')
end;



{ install -- add name, definition and type to table }
procedure install (var name, defn : string; t : sttype);
var
  h, dlen, nlen : integer;
  p : ndptr;
begin
  nlen := length(name) + 1;  { 1 for ENDSTR }
  dlen := length(defn) + 1;
  if (nexttab + nlen + dlen > MAXCHARS) then begin
    putstr(name, STDERR);
    error(': too many definitions')
  end
  else begin  { put it at front of chain }
    h := hash(name);
    new(p);
    p^.nextptr := hashtab[h];
    hashtab[h] := p;
    p^.name := nexttab;
    sccopy(name, ndtable, nexttab);
    nexttab := nexttab + nlen;
    p^.defn := nexttab;
    sccopy(defn, ndtable, nexttab);
    nexttab := nexttab + dlen;
    p^.kind := t
  end
end;


{ hash -- compute hash function of a name }
function hash (var name : string) : integer;
var
  i, h : integer;
begin
  h := 0;
  for i := 1 to length(name) do
    h := (3 * h + name[i]) mod HASHSIZE;
  hash := h + 1
end;


{ sccopy -- copy string s to cb[i]... }
procedure sccopy (var s : string; var cb : charbuf;
    i : charpos);
var
  j : integer;
begin
  j := 1;
  while (s[j] <> ENDSTR) do begin
    cb[i] := s[j];
    j := j + 1;
    i := i + 1
  end;
  cb[i] := ENDSTR
end;


{ gettok -- get token for define }
function gettok (var token : string; toksize : integer)
    : character;
var
  i : integer;
  done : boolean;
begin
  i := 1;
  done := false;
  while (not done) and (i < toksize) do
    if (isalphanum(getpbc(token[i]))) then
      i := i + 1
    else
      done := true;
  if (i >= toksize) then
    error('define: token too long');
  if (i > 1) then begin  { some alpha was seen }
    putback(token[i]);
    i := i - 1
  end;
  { else single non-alphanumeric }
  token[i+1] := ENDSTR;
  gettok := token[1]
end;


{ getpbc -- get a (possibly pushed back) character }
function getpbc (var c : character) : character;
begin
  if (bp > 0) then
    c := buf[bp]
  else begin
    bp := 1;
    buf[bp] := getc(c)
  end;
  if (c <> ENDFILE) then
    bp := bp - 1;
  getpbc := c
end;


{ putback -- push character back onto input }
procedure putback (c : character);
begin
  if (bp >= BUFSIZE) then
    error('too many characters pushed back');
  bp := bp + 1;
  buf[bp] := c
end;


{ lookup -- locate name, get defn and type from table }
function lookup (var name, defn : string; var t : sttype)
    : boolean;
var
  p : ndptr;
begin
  p := hashfind(name);
  if (p = nil) then
    lookup := false
  else begin
    lookup := true;
    cscopy(ndtable, p^.defn, defn);
    t := p^.kind
  end
end;


{ hashfind -- find name in hash table }
function hashfind (var name : string) : ndptr;
var
  p : ndptr;
  tempname : string;
  found : boolean;
begin
  found := false;
  p := hashtab[hash(name)];
  while (not found) and (p <> nil) do begin
    cscopy(ndtable, p^.name, tempname);
    if (equal(name, tempname)) then
      found := true
    else
      p := p^.nextptr
  end;
  hashfind := p
end;


{ push -- push ep onto argstk, return new position ap }
function push (ep : integer; var argstk : posbuf;
    ap : integer) : integer;
begin
  if (ap > ARGSIZE) then
    error('macro: argument stack overflow');
  argstk[ap] := ep;
  push := ap + 1
end;


{ pbstr -- push string back onto input }
procedure pbstr (var s : string);
var
  i : integer;
begin
  for i := length(s) downto 1 do
    putback(s[i])
end;


{ putchr -- put single char on output or evaluation stack }
procedure putchr (c : character);
begin
  if (cp <= 0) then
    putc(c)
  else begin
    if (ep > EVALSIZE) then
      error('macro: evaluation stack overflow');
    evalstk[ep] := c;
    ep := ep + 1
  end
end;


{ initmacro -- initialize variables for macro }
procedure initmacro;
begin
  null[1] := ENDSTR;
  { setstring(defname, 'define'); }
    defname[1] := ord('d');
    defname[2] := ord('e');
    defname[3] := ord('f');
    defname[4] := ord('i');
    defname[5] := ord('n');
    defname[6] := ord('e');
    defname[7] := ENDSTR;
  { setstring(subname, 'substr'); }
    subname[1] := ord('s');
    subname[2] := ord('u');
    subname[3] := ord('b');
    subname[4] := ord('s');
    subname[5] := ord('t');
    subname[6] := ord('r');
    subname[7] := ENDSTR;
  { setstring(exprname, 'expr'); }
    exprname[1] := ord('e');
    exprname[2] := ord('x');
    exprname[3] := ord('p');
    exprname[4] := ord('r');
    exprname[5] := ENDSTR;
  { setstring(ifname, 'ifelse'); }
    ifname[1] := ord('i');
    ifname[2] := ord('f');
    ifname[3] := ord('e');
    ifname[4] := ord('l');
    ifname[5] := ord('s');
    ifname[6] := ord('e');
    ifname[7] := ENDSTR;
  { setstring(lenname, 'len'); }
    lenname[1] := ord('l');
    lenname[2] := ord('e');
    lenname[3] := ord('n');
    lenname[4] := ENDSTR;
  { setstring(chqname, 'changeq'); }
    chqname[1] := ord('c');
    chqname[2] := ord('h');
    chqname[3] := ord('a');
    chqname[4] := ord('n');
    chqname[5] := ord('g');
    chqname[6] := ord('e');
    chqname[7] := ord('q');
    chqname[8] := ENDSTR;
  bp := 0;  { pushback buffer pointer }
  inithash;
  lquote := ord(GRAVE);
  rquote := ord(ACUTE)
end;


{ inithash -- initialize hash table to nil }
procedure inithash;
var
  i : 1..HASHSIZE;
begin
  nexttab := 1;  { first free slot in table }
  for i := 1 to HASHSIZE do
    hashtab[i] := nil
end;


{ puttok -- put token on output or evaluation stack }
procedure puttok (var s : string);
var
  i : integer;
begin
  i := 1;
  while (s[i] <> ENDSTR) do begin
    putchr(s[i]);
    i := i + 1
  end
end;


{ eval -- expand args i..j: do built-in or push back defn }
procedure eval (var argstk : posbuf; td : sttype;
    i, j : integer);
var
  argno, k, t : integer;
  temp : string;
begin
  t := argstk[i];
  if (td = DEFTYPE) then
    dodef(argstk, i, j)
  else if (td = EXPRTYPE) then
    doexpr(argstk, i, j)
  else if (td = SUBTYPE) then
    dosub(argstk, i, j)
  else if (td = IFTYPE) then
    doif(argstk, i, j)
  else if (td = LENTYPE) then
    dolen(argstk, i, j)
  else if (td = CHQTYPE) then
    dochq(argstk, i, j)
  else begin
    k := t;
    while (evalstk[k] <> ENDSTR) do
      k := k + 1;
    k := k - 1;  { last character of defn }
    while (k > t) do begin
      if (evalstk[k-1] <> ARGFLAG) then
        putback(evalstk[k])
      else begin
        argno := ord(evalstk[k]) - ord('0');
        if (argno >= 0) and (argno < j-i) then begin
          cscopy(evalstk, argstk[i+argno+1], temp);
          pbstr(temp)
        end;
        k := k - 1  { skip over $ }
      end;
      k := k - 1
    end;
    if (k = t) then   { do last character }
      putback(evalstk[k])
  end
end;


{ dodef -- install definition in table }
procedure dodef (var argstk : posbuf; i, j : integer);
var
  temp1, temp2 : string;
begin
  if (j - i > 2) then begin
    cscopy(evalstk, argstk[i+2], temp1);
    cscopy(evalstk, argstk[i+3], temp2);
    install(temp1, temp2, MACTYPE)
  end
end;


{ cscopy -- copy cb[i]... to string s }
procedure cscopy (var cb : charbuf; i : charpos;
    var s : string);
var
  j : integer;
begin
  j := 1;
  while (cb[i] <> ENDSTR) do begin
    s[j] := cb[i];
    i := i + 1;
    j := j + 1
  end;
  s[j] := ENDSTR
end;


{ doexpr -- evaluate arithmetic expressions }
procedure doexpr (var argstk : posbuf; i, j : integer);
var
  temp : string;
  junk : integer;
begin
  cscopy(evalstk, argstk[i+2], temp);
  junk := 1;
  pbnum(expr(temp, junk))
end;


{ pbnum -- convert number to string, push back on input }
procedure pbnum (n : integer);
var
  temp : string;
  junk : integer;
begin
  junk := itoc(n, temp, 1);
  pbstr(temp)
end;


{ expr -- recursive expression evaluation }
function expr (var s : string; var i : integer) : integer;
var
  v : integer;
  t : character;
begin
  v := term(s, i);
  t := gnbchar(s, i);
  while (t in [PLUS, MINUS]) do begin
    i := i + 1;
    if (t = PLUS) then
      v := v + term(s, i)
    else
      v := v - term(s, i);
    t := gnbchar(s, i)
  end;
  expr := v
end;


{ gnbchar -- get next non-blank character }
function gnbchar (var s : string; var i : integer)
    : character;
begin
  while (s[i] in [BLANK, TAB, NEWLINE]) do
    i := i + 1;
  gnbchar := s[i]
end;


{ term -- evaluate term of arithmetic expression }
function term (var s : string; var i : integer) : integer;
var
  v : integer;
  t : character;

begin
  v := factor(s, i);
  t := gnbchar(s, i);
  while (t in [STAR, SLASH, PERCENT]) do begin
    i := i + 1;
    case t of
    STAR:
      v := v * factor(s, i);
    SLASH:
      v := v div factor(s, i);
    PERCENT:
      v := v mod factor(s, i)
    end;
    t := gnbchar(s, i)
  end;
  term := v
end;


{ factor -- evaluate factor of arithmetic expression }
function factor (var s : string; var i : integer)
     : integer;
begin
  if (gnbchar(s, i) = LPAREN) then begin
    i := i + 1;
    factor := expr(s, i);
    if (gnbchar(s, i) = RPAREN) then
      i := i + 1
    else
      message('macro: missing paren in expr')
  end
  else
    factor := ctoi(s, i)
end;


{ dosub -- select substring }
procedure dosub (var argstk : posbuf; i, j : integer);
var
  ap, fc, k, nc : integer;
  temp1, temp2 : string;
begin
  if (j - i >= 3) then begin
    if (j - i < 4) then
      nc := MAXTOK
    else begin
      cscopy(evalstk, argstk[i+4], temp1);
      k := 1;
      nc := expr(temp1, k)
    end;
    cscopy(evalstk, argstk[i+3], temp1);  { origin }
    ap := argstk[i+2];  { target string }
    k := 1;
    fc := ap + expr(temp1, k) - 1;  { first char }
    cscopy(evalstk, ap, temp2);
    if (fc >= ap) and (fc < ap+length(temp2)) then begin
      cscopy(evalstk, fc, temp1);
      for k := fc+min(nc,length(temp1))-1 downto fc do
        putback(evalstk[k])
    end
  end
end;


{ doif -- select one of two arguments }
procedure doif (var argstk : posbuf; i, j : integer);
var
  temp1, temp2, temp3 : string;
begin
  if (j - i >= 4) then begin
    cscopy(evalstk, argstk[i+2], temp1);
    cscopy(evalstk, argstk[i+3], temp2);
    if (equal(temp1, temp2)) then
      cscopy(evalstk, argstk[i+4], temp3)
    else if (j - i >= 5) then
      cscopy(evalstk, argstk[i+5], temp3)
    else
      temp3[1] := ENDSTR;
    pbstr(temp3)
  end
end;


{ dolen -- return length of argument }
procedure dolen(var argstk : posbuf; i, j : integer);
var
  temp : string;
begin
  if (j - i > 1) then begin
    cscopy(evalstk, argstk[i+2], temp);
    pbnum(length(temp))
  end
  else
    pbnum(0)
end;



{ dochq -- change quote characters }
procedure dochq (var argstk : posbuf; i, j : integer);
var
  temp : string;
  n : integer;
begin
  cscopy(evalstk, argstk[i+2], temp);
  n := length(temp);
  if (n <= 0) then begin
    lquote := ord(GRAVE);
    rquote := ord(ACUTE)
  end
  else if (n = 1) then begin
    lquote := temp[1];
    rquote := lquote
  end
  else begin
    lquote := temp[1];
    rquote := temp[2]
  end
end;
