#
# sasm.py
# coded 2010-11-11 by Bill Ashmanskas, ashmansk@hep.upenn.edu
# simple assembler for instruction set of simple_cpu.v
#

import sys, string

def main():
    # Keep a list ('ptext') of all of the non-blank lines that we
    # process, because we need to run through the entire program
    # once to assign memory addresses to labels, and then a second 
    # time to translate the instructions into memory contents
    ptext = []
    # Keep track of current memory address ('addr'); program will
    # start at address zero
    addr = 0x00
    # Lookup table (python "dictionary") to associate label
    # name with memory addresses: labeladdr[label] stores address
    labeladdr = {}
    # Read line-by-line through file whose name is given in first
    # command-line argument (sys.argv[1])
    for l in open(sys.argv[1]):
        # Strip away leading & trailing spaces; discard comments (#)
        l = l.strip()
        if "#" in l: l = l.split("#")[0]
        # Discard spaces before '#' comment; chop up line into words
        w = l.strip().split()
        # Skip blank lines
        if len(w)==0: continue
        # If the first word is a label (ends in ':'), associate the
        # current address (addr) with the label name
        if w[0][-1]==":":
            # First word, minus final character (':') is the label name
            label = w[0][:-1]
            labeladdr[label] = addr
            # Save for further processing all except the first word of
            # this line, i.e. it will discard w[0] and will keep w[1]
            # through the end of the list
            p = w[1:]
        else:
            # Save entire list of words for further processing
            p = w[:]
        if len(p)>0:
            # If there is a non-empty list of words to be interpreted
            # downstream as an instruction (or a .data directive), then
            # store this list of words (with the address prepended) in
            # the 'ptext' list
            p = ["%02x"%(addr)]+p
            ptext.append(p)
            addr += 1
    # The dictionary 'labeladdr' maps labels to address; here we also
    # make the reverse-lookup dictionary 'addrlabel' to map addresses
    # into labels; the only purpose for this is so that we can output
    # annotated code that includes the label names
    addrlabel = dict([(labeladdr[k],k) for k in labeladdr])
    # A list of spaces equal in length to "longestlabelname: ", so
    # that the lines with and without labels line up nicely when printed
    nulllabel = " "*(2+max([len(k) for k in labeladdr]))
    # Loop a second time through each line of program text that contains
    # either an instruction or a '.data' directive
    for p in ptext:
        # The first word should be the address (in hexadecimal)
        addr = int(p[0],16)
        # The second word should be the "mnemonic" name for the
        # instruction to be stored at this address; convert it
        # to lowercase so that assembler is case-insensitive
        mnemonic = p[1].lower()
        # Third word should be the argument of this opcode; if
        # no argument is given, use "0" as a default argument
        if len(p)>2:
            argument = p[2]
        else:
            argument = "0"
        # If the argument is the name of label, translate it into
        # the corresponding memory address; otherwise, interpret
        # the argument as a hexadecimal number
        if argument in labeladdr: 
            harg = labeladdr[argument]
        else:
            harg = int(argument,16)
        # 'hbyte' is the low 8 bits of the argument
        hbyte = harg & 0xff
        # This is the list of instructions that our processor
        # understands, in order by opcode, i.e. LOAD is opcode 0,
        # STORE is opcode 1, JUMP is opcode 2, etc.
        instrs = "load store jump jumpz jumpn add sub mul out".split()
        # For a .data directive, the memory word is literally the
        # given 16-bit argument; for an instruction mnemonic (e.g. STORE),
        # the memory word is 256*opcode + argument, i.e. high byte is
        # opcode, and low byte is (8-bit) argument
        if mnemonic==".data":
            instr = harg
        elif mnemonic in instrs:
            instr = instrs.index(mnemonic)*0x0100 + hbyte
        else:
            raise ValueError, "unknown mnemonic "+mnemonic
        # Find the label, if any, corresponding to this address, and pad
        # it with spaces so that the output lines up in columns
        label = nulllabel
        if addr in addrlabel:
            label = addrlabel[addr]+": "+nulllabel
            label = label[:len(nulllabel)]
        # Write out the calculated memory contents, along with the
        # human-readable version of the memory contents
        print "        mem['h%02x] = 'h%04x;  //  %s %-5s"%( \
            addr, instr, label, p[1]), \
            " ".join(p[2:])
    # Pad the output with zeros to fill 256 memory words
    for addr in range(addr+1,256):
        print "        mem['h%02x] = 'h%04x;"%(addr, 0)

# Python idiom for running the main program
if __name__=="__main__":
    main()