
# rdMacro.py
# Andrew Davison, ad@coe.psu.ac.th, May 2026

"""
Recursive-descent macro processor.

Reads a macro source file whose name ends in ".m3" and
writes the expanded text with the extension ".txt".

Usage:
  python rdMacro.py define.m3
  python3 rdMacro.py -trace changeq.m3

Converted from ideas in
  "Software Tools in Pascal"
  Brian W. Kernighan, P. J. Plauger
  Addison-Wesley, 1981
  Chapter 8

Supported built-in macros
 * define(name, body)
    – define a macro; $1..$9 expand to arguments
 * expr(e)
    – evaluate an arithmetic expression (+,-,*,/,%)
 * substr(s, from, n)
    – substring of s starting at 0-based index from, n chars
 * ifelse(a, b, t, f)
    – if a==b expand to t, else f
 * len(s)
    – length of s
 * changeq(lq, rq)
    – change quote characters (default ` and ')

Macro grammar (BNF-like):

  mtext      ::= { element }

  element    ::=  macro-call
               | ident
               | quoted
               | other-char

  macro-call ::= ident '(' args ')'
  ident      ::= (letter | '_') { letter | digit | '_' }
  args       ::=  [ mtext {',' mtext } ]

  quoted      ::= lquote qbody rquote
  qbody ::= { quoted | any-char }

  user-macro ::= text containing $0..$9 substitutions


Expression grammar:
  expr       ::= term { ('+' | '-') term }
  term       ::= factor { ('*' | '/' | '%') factor }
  factor     ::= number
               | '(' expr ')'
  number     ::= ['-'] digit { digit }

Notes

 * Macro arguments are recursively expanded.
 * Quotes suppress expansion until the quoted text is emitted.
 * Nested macro calls are parsed recursively.
 * User-defined macros substitute $1..$9 with arguments.
"""

import sys
import re
from collections import deque


# Symbol-table types
DEFTYPE  = 'define'
MACTYPE  = 'macro'
IFTYPE   = 'if'
SUBTYPE  = 'substr'
EXPRTYPE = 'expr'
LENTYPE  = 'len'
CHQTYPE  = 'changeq'

# Symbol table: name -> (defn:str, kind)
symTable = {}

# Quote characters
lquote = '`'
rquote = "'"

isTracing = False


def macro(src):
  inQ = deque(src)

  # Initialize built-ins
  for name, kind in [('define', DEFTYPE),
          ('expr', EXPRTYPE), ('substr', SUBTYPE),
          ('ifelse', IFTYPE), ('len', LENTYPE),
          ('changeq', CHQTYPE)]:
    symTable[name] = ('', kind)

  return parseMText(inQ)



def parseMText(inQ, stopChars=''):
  ''' mtext ::= { element }
   element  ::=  macro-call | ident
             | quoted
             | other-char
  '''
  out = []
  while True:
    ch = peekChar(inQ)
    if ch == '':
      break
    if ch in stopChars:
      break
    if ch.isalpha() or ch == '_':
      out.append(parseCall(inQ))
    elif ch == lquote:
      out.append(parseQuoted(inQ))
    else:
      out.append(getChar(inQ))
  return ''.join(out)



def parseCall(inQ):
  ''' Parses  macro-call | ident
      where macro-call ::= ident '(' args ')'
  '''
  name = parseIdent(inQ)
  entry = symTable.get(name)
  if entry is None:
    return name

  if isTracing:
    print(f"TRACE: Potential macro '{name}'")

  skippedSpaces = []
  while True:
    ch = getChar(inQ)
    if ch not in (' ', '\t', '\n'):
      break
    skippedSpaces.append(ch)

  # ch is first non-whitespace char (or '')
  if ch != '(':  # not the start of args
    if ch:  # push skipped spaces back onto input
      inQ.appendleft(ch)
    inQ.extendleft(reversed(skippedSpaces))

    # check if name is a user macro
    defn, kind = entry
    if kind == MACTYPE and '$' not in defn:
      return macro(defn)
    else:  # not a macro call
      # return name and spaces so spaces included in output
      return name + ''.join(skippedSpaces)
  else:
    args = parseArgs(inQ)
    return expandMacro(name, entry, args)


def parseIdent(inQ):
  # ident ::= (letter | '_') { letter | digit | '_' }
  tokenChars = []
  while True:
    ch = peekChar(inQ)
    if ch.isalnum() or ch == '_':  
      # note: first time through, ch must be letter | '_'
      tokenChars.append(getChar(inQ))
    else:
      break
  return ''.join(tokenChars)



def parseArgs(inQ):
  ''' args ::= [ mtext {',' mtext } ]
    Performs argument collection only. Macro expansion
    is done by expandMacro() in parseCall(). 

    It also tracks paren depth, recognizes quotes, 
    and splits up the arguments using top-level commas.

    Must track paren depth so inner '(' ... ')' pairs
    don't end the argument list too soon.
  '''
  args = []
  depth = 1  # we've already consumed the opening '('
  buf   = []
  
  while True:
    ch = peekChar(inQ)
    if ch == '':
      raise RuntimeError('macro: missing closing ) in args')
    
    if ch == lquote:
      buf.append(parseQuoted(inQ))
      continue
    
    ch = getChar(inQ)
    if ch == '(':
      depth += 1
      buf.append(ch)
    elif ch == ')':
      depth -= 1
      if depth == 0:    # end of this macro call's args
        args.append(''.join(buf).strip())
        break
      else:
        buf.append(ch)
    elif ch == ',' and depth == 1:   
      # top-level separator between each arg 
      args.append(''.join(buf).strip())
      buf = []
    else:
      buf.append(ch)
  return args


def parseQuoted(inQ):
  '''  quoted ::= lquote qbody rquote
        qbody ::= { quoted | any-char }
    and strips quote characters
  '''
  getChar(inQ)   # consume left quote
  quoted = []
  nlpar = 1
  while nlpar > 0:
    ch = getChar(inQ)
    if ch == '':
      raise RuntimeError('macro: missing right quote')
    if ch == rquote:
      nlpar -= 1
      if nlpar > 0:
        quoted.append(ch)
    elif ch == lquote:
      nlpar += 1
      quoted.append(ch)
    else:
      quoted.append(ch)
  return ''.join(quoted)



def expandMacro(name, entry, args):
  defn, kind = entry
  if isTracing:
    print(f"TRACE: Eval '{name}' args: {args}")

  # Built-ins only need the user arguments
  if kind == DEFTYPE:
    return dodef(args)
  elif kind == EXPRTYPE:
    return doexpr(args)
  elif kind == SUBTYPE:
    return dosub(args)
  elif kind == IFTYPE:
    return doif(args)
  elif kind == LENTYPE:
    return dolen(args)
  elif kind == CHQTYPE:
    return dochq(args)
  elif kind == MACTYPE:
    return doUserMacro(defn, name, args)
  return ''



# Built-in macro handlers ----------------------

def dodef(args):
  # define(name, body) – add a macro
  if len(args) >= 1:
    name = args[0].strip()
    # Rejoin if user used commas
    body = ",".join(args[1:])
    symTable[name] = (body, MACTYPE)
  return ''


def doexpr(args):
  # evaluate arithmetic expression;
  # expand first so macros inside work
  if len(args) > 0:
    try:
      return str(evalExpr(macro(args[0])))
    except Exception:
      # Default or error state
      return "0"
  return "0"


def dosub(args):
  # substr(s, from, n) – return substring of s.
  # using 0-based indexing for the target string
  if len(args) > 0:
    s = macro(args[0])
    frm = evalExpr(macro(args[1])) \
                      if len(args) > 1 else 0
    nc = evalExpr(macro(args[2])) \
                      if len(args) > 2 else len(s)
    return s[frm: frm + nc]
  return ''


def doif(args):
  # ifelse(v1, v2, then, else)
  if len(args) >= 3:
    # expand v1 and v2 before comparing
    v1 = macro(args[0].strip())
    v2 = macro(args[1].strip())
    if v1 == v2:
      return args[2]
    elif len(args) > 3:
      return args[3]
  return ''


def dolen(args):
  if len(args) > 0:
    return str(len(macro(args[0])))
  return '0'


def dochq(args):
  # changeq(lq, rq) – change quote characters
  global lquote, rquote
  if len(args) >= 2:
    # User called changeq(l, r)
    lquote = args[0].strip() if args[0].strip() else '`'
    rquote = args[1].strip() if args[1].strip() else "'"
  elif len(args) == 1:
    # User called changeq(lr)
    s = args[0].strip()
    if len(s) == 0:
      lquote, rquote = '`', "'"
    elif len(s) == 1:
      lquote, rquote = s[0], s[0]
    else:
      lquote, rquote = s[0], s[1]
  else:
    # reset to defaults
    lquote, rquote = '`', "'"
  return ''


def doUserMacro(defn, name, args):
  """ Expand a user-defined macro.
      $1..$9 in the def are replaced by 
      the corresponding args.
      $0 is replaced by the macro name.
  """
  def replaceArg(match):
    argIdx = int(match.group(1))
    if argIdx == 0:
      return name
    return args[argIdx-1] \
              if 0 < argIdx <= len(args) else ""

  expanded = re.sub(r'\$(\d)', replaceArg, defn)
  return macro(expanded)



# ----------- expression evaluator ---------------
''' expr := term { (+|-) term }
    term := factor { (*|/|%) factor }
    factor := number
           |  ( expr )
'''

def evalExpr(s):
  return parseExpr(s, 0)[0]  # discard index


def parseExpr(s, i):
  # expr := term { (+|-) term }
  v, i = parseTerm(s, i)
  while True:
    i = skipWhites(s, i)
    if i < len(s) and s[i] == '+':
      t, i = parseTerm(s, i + 1)
      v += t
    elif i < len(s) and s[i] == '-':
      t, i = parseTerm(s, i + 1)
      v -= t
    else:
      break
  return v, i


def parseTerm(s, i):
  # term := factor { (*|/|%) factor }
  v, i = parseFactor(s, i)
  while True:
    i = skipWhites(s, i)
    if i < len(s) and s[i] == '*':
      t, i = parseFactor(s, i + 1)
      v *= t
    elif i < len(s) and s[i] == '/':
      t, i = parseFactor(s, i + 1)
      v //= t
    elif i < len(s) and s[i] == '%':
      t, i = parseFactor(s, i + 1)
      v %= t
    else:
      break
  return v, i


def parseFactor(s, i):
  # factor := number  |  ( expr )
  i = skipWhites(s, i)
  if i < len(s) and s[i] == '(':
    v, i = parseExpr(s, i + 1)
    i = skipWhites(s, i)
    if i >= len(s) or s[i] != ')':
      raise ValueError('macro: missing ) in expr')
    return v, i + 1

  sign = 1
  if i < len(s) and s[i] == '-':
    sign = -1
    i += 1

  start = i
  while i < len(s) and s[i].isdigit():
    i += 1
  if start == i:
    raise ValueError('macro: expected number in expr')

  return sign * int(s[start:i]), i


def skipWhites(s, i):
  # skip white spaces
  while i < len(s) and s[i] in ' \t\n':
    i += 1
  return i


# Queue / I/O helpers ------------------

def peekChar(inQ):
  return inQ[0] if inQ else ''


def getChar(inQ):
  return inQ.popleft() if inQ else ''



# main() ----------

if len(sys.argv) < 2:
  print('Usage: python3 rdMacro.py [-trace] <file.m3>')
  sys.exit(1)

# Check for trace flag
args = sys.argv[1:]

if '-trace' in args:
  isTracing = True
  args.remove('-trace')

if not args or not args[0].endswith('.m3'):
  print(f'Error: input file must have a .m3 extension')
  sys.exit(1)

infile = args[0]
try:
  with open(infile, 'r') as fh:
    src = fh.read()
except FileNotFoundError:
  print(f'Error: cannot open {infile!r}')
  sys.exit(1)

expandedSrc = macro(src)

print("\n--------- Expansion ---------\n")
print(expandedSrc)
print("-----------------------------\n")

outfile = infile[:-3] + '.txt'
with open(outfile, 'w') as fh:
  fh.write(expandedSrc)
print(f'Expanded output written to {outfile!r}')
