# # sasm.py # coded 2010-11-11 by Bill Ashmanskas, ashmansk@hep.upenn.edu # simple assembler for instruction set of simple_cpu.v # import sys, string def main(): # Keep a list ('ptext') of all of the non-blank lines that we # process, because we need to run through the entire program # once to assign memory addresses to labels, and then a second # time to translate the instructions into memory contents ptext = [] # Keep track of current memory address ('addr'); program will # start at address zero addr = 0x00 # Lookup table (python "dictionary") to associate label # name with memory addresses: labeladdr[label] stores address labeladdr = {} # Read line-by-line through file whose name is given in first # command-line argument (sys.argv[1]) for l in open(sys.argv[1]): # Strip away leading & trailing spaces; discard comments (#) l = l.strip() if "#" in l: l = l.split("#")[0] # Discard spaces before '#' comment; chop up line into words w = l.strip().split() # Skip blank lines if len(w)==0: continue # If the first word is a label (ends in ':'), associate the # current address (addr) with the label name if w[0][-1]==":": # First word, minus final character (':') is the label name label = w[0][:-1] labeladdr[label] = addr # Save for further processing all except the first word of # this line, i.e. it will discard w[0] and will keep w[1] # through the end of the list p = w[1:] else: # Save entire list of words for further processing p = w[:] if len(p)>0: # If there is a non-empty list of words to be interpreted # downstream as an instruction (or a .data directive), then # store this list of words (with the address prepended) in # the 'ptext' list p = ["%02x"%(addr)]+p ptext.append(p) addr += 1 # The dictionary 'labeladdr' maps labels to address; here we also # make the reverse-lookup dictionary 'addrlabel' to map addresses # into labels; the only purpose for this is so that we can output # annotated code that includes the label names addrlabel = dict([(labeladdr[k],k) for k in labeladdr]) # A list of spaces equal in length to "longestlabelname: ", so # that the lines with and without labels line up nicely when printed nulllabel = " "*(2+max([len(k) for k in labeladdr])) # Loop a second time through each line of program text that contains # either an instruction or a '.data' directive for p in ptext: # The first word should be the address (in hexadecimal) addr = int(p[0],16) # The second word should be the "mnemonic" name for the # instruction to be stored at this address; convert it # to lowercase so that assembler is case-insensitive mnemonic = p[1].lower() # Third word should be the argument of this opcode; if # no argument is given, use "0" as a default argument if len(p)>2: argument = p[2] else: argument = "0" # If the argument is the name of label, translate it into # the corresponding memory address; otherwise, interpret # the argument as a hexadecimal number if argument in labeladdr: harg = labeladdr[argument] else: harg = int(argument,16) # 'hbyte' is the low 8 bits of the argument hbyte = harg & 0xff # This is the list of instructions that our processor # understands, in order by opcode, i.e. LOAD is opcode 0, # STORE is opcode 1, JUMP is opcode 2, etc. instrs = "load store jump jumpz jumpn add sub mul out".split() # For a .data directive, the memory word is literally the # given 16-bit argument; for an instruction mnemonic (e.g. STORE), # the memory word is 256*opcode + argument, i.e. high byte is # opcode, and low byte is (8-bit) argument if mnemonic==".data": instr = harg elif mnemonic in instrs: instr = instrs.index(mnemonic)*0x0100 + hbyte else: raise ValueError, "unknown mnemonic "+mnemonic # Find the label, if any, corresponding to this address, and pad # it with spaces so that the output lines up in columns label = nulllabel if addr in addrlabel: label = addrlabel[addr]+": "+nulllabel label = label[:len(nulllabel)] # Write out the calculated memory contents, along with the # human-readable version of the memory contents print " mem['h%02x] = 'h%04x; // %s %-5s"%( \ addr, instr, label, p[1]), \ " ".join(p[2:]) # Pad the output with zeros to fill 256 memory words for addr in range(addr+1,256): print " mem['h%02x] = 'h%04x;"%(addr, 0) # Python idiom for running the main program if __name__=="__main__": main()