/* * simple_cpu.v * * adapted Nov 2010 by Bill Ashmanskas (ashmansk@hep.upenn.edu) * from 'DE2_TOP.v' in * http://courses.cit.cornell.edu/ece576/DE2/TinyCPU/HamblenCh9.zip * which itself was adapted (by Bruce Land at Cornell, July 2008) from * 'uP1' and 'uP3' simple computer examples, which appear (in VHDL) in * Hamblen's book, "Rapid prototyping of digital systems." * * This is the simplest (to explain) example I could find/adapt of a * microprocessor (in synthesizable Verilog) capable of doing non-trivial * computation. I demonstrate displaying primes from 2 to 9973. * * I took care to use only Verilog constructs that are easy for a beginner * to understand. Thus, always blocks are used only for synchronous logic, * and are encapsulated inside RAM and D-type flip-flop definitions. The * FSM combinational logic is implemented using only continuous assignments, * instead of the case statements more commonly used in FSM logic. (A case * statement would require a combinational always block, which would require * a discussion of sensitivity lists, blocking vs. non-blocking assignments, * inferred latches, race conditions, and other needless distractions.) * * The Verilog constructs that are used in this example are illustrated * in my notes at http://positron.hep.upenn.edu/p364/verilog_notes.html . */ `default_nettype none module simple_cpu ( input wire clk, // CPU clock input wire reset, // reset CPU program counter input wire run, // 1=run, 0=pause output wire [15:0] out, // regster for CPU "output" to world output wire [7:0] PC, // program counter output wire [15:0] acc, // accumulator (CPU register) output wire [7:0] memaddr, // memory address output wire [15:0] mem_q, // memory output data output wire [15:0] IR, // instruction register output wire [3:0] fsm // CPU state ); wire [15:0] acc_d; // to go into accumulator next cycle wire [15:0] RAM_q; // data read out of RAM this cycle wire [7:0] PC_d; // to go into Pgm Counter next cycle wire [3:0] fsm_d; // to go into FSM state FF next cycle // write-enable lines for FFs and RAM wire acc_we, out_we, IR_we, PC_we, mem_we; /* * D-type flip flop to hold state number of CPU's Finite State Machine */ dffe_Nbit #(4) fsm_ff (.clk(clk), .d(fsm_d), .q(fsm), .ena(run||reset)); parameter // FSM state numbering FSM_RESET = 0, FSM_FETCH = 1, FSM_DECODE = 2, FSM_EXEC_LOAD = 3, FSM_EXEC_STORE = 4, FSM_EXEC_STORE2 = 5, FSM_EXEC_JUMP = 6, FSM_EXEC_JUMPZ = 7, FSM_EXEC_JUMPN = 8, FSM_EXEC_ADD = 9, FSM_EXEC_SUB = 10, FSM_EXEC_MUL = 11, FSM_EXEC_OUT = 12; /* * Next-state logic: * reset line => RESET * FETCH => DECODE * DECODE => execute decoded instruction (LOAD, STORE, JUMP, etc.) * STORE => STORE2 (added cycle for memory write to complete) * any other => FETCH */ assign fsm_d = reset ? FSM_RESET : fsm==FSM_FETCH ? FSM_DECODE : fsm==FSM_DECODE ? (IR[15:8]==0 ? FSM_EXEC_LOAD : IR[15:8]==1 ? FSM_EXEC_STORE : IR[15:8]==2 ? FSM_EXEC_JUMP : IR[15:8]==3 ? FSM_EXEC_JUMPZ : IR[15:8]==4 ? FSM_EXEC_JUMPN : IR[15:8]==5 ? FSM_EXEC_ADD : IR[15:8]==6 ? FSM_EXEC_SUB : IR[15:8]==7 ? FSM_EXEC_MUL : IR[15:8]==8 ? FSM_EXEC_OUT : FSM_FETCH) : fsm==FSM_EXEC_STORE ? FSM_EXEC_STORE2 : FSM_FETCH; /* * This CPU uses an internal RAM consisting of 256 16-bit words. * For simplicity, this RAM has separate D and Q lines for data * written to vs. read from the RAM. */ ram256x16 ram (.clk(clk), .we(mem_we), .A(memaddr), .D(acc), .Q(mem_q)); assign memaddr = (fsm==FSM_FETCH) ? PC : IR[7:0]; assign mem_we = (fsm==FSM_EXEC_STORE); /* * Accumulator is this CPU's primary register; all math * instructions operate on the accumulator. * * Acccumulator next-value logic: * ADD => acc := acc + memory * SUB => acc := acc - memory * MUL => acc := acc * memory * LOAD => acc := memory * RESET => acc := 0 * * Note that the multiply happens in a single clock cycle, so it * should synthesize to an entirely combinational multiplier -- the * one you would write down using an adder and a multiplexer for each * bit of the multiplicand. */ dffe_Nbit #(16) acc_ff (.clk(clk), .d(acc_d), .q(acc), .ena(acc_we)); assign acc_d = (fsm==FSM_EXEC_ADD) ? acc + mem_q : (fsm==FSM_EXEC_SUB) ? acc - mem_q : (fsm==FSM_EXEC_MUL) ? acc * mem_q : (fsm==FSM_EXEC_LOAD) ? mem_q : 0; assign acc_we = (fsm==FSM_EXEC_ADD || fsm==FSM_EXEC_SUB || fsm==FSM_EXEC_MUL || fsm==FSM_EXEC_LOAD || fsm==FSM_RESET ); /* * Output register is CPU's way to report result to outside world. * * The only path into the 'out' register is from the accumulator; * it is only write-enabled when executing the OUT instruction. */ dffe_Nbit #(16) out_ff (.clk(clk), .d(acc), .q(out), .ena(out_we)); assign out_we = fsm==FSM_EXEC_OUT; /* * Instruction Register holds instruction currently being executed. * * The only path into the IR is from the memory; it is only * write-enabled in the FETCH state, i.e. while fetching the next * instruction from memory. */ dffe_Nbit #(16) IR_ff (.clk(clk), .d(mem_q), .q(IR), .ena(IR_we)); assign IR_we = (fsm==FSM_FETCH); /* * Program Counter holds address from which next instruction is fetched. * * Program Counter update logic: * RESET => PC := 0 * FETCH => PC := PC+1 (after fetching from PC, point to PC+1) * JUMP => PC := low byte of IR * JUMPZ => PC := low byte of IR if acc == 0, else unchanged * JUMPN => PC := low byte of IR if acc < 0, else unchanged */ dffe_Nbit #(8) PC_ff (.clk(clk), .d(PC_d), .q(PC), .ena(PC_we)); assign PC_d = (fsm==FSM_RESET) ? 0 : (fsm==FSM_FETCH) ? PC+1 : IR[7:0] ; assign PC_we = (fsm==FSM_RESET) || (fsm==FSM_FETCH) || (fsm==FSM_EXEC_JUMP) || (fsm==FSM_EXEC_JUMPZ && acc==0) || (fsm==FSM_EXEC_JUMPN && acc[15] ); endmodule /* * Random Access Memory containing 256 words, each 16 bits wide; * for this RAM, writes are synchronous to clk, but reads are * asynchronous (i.e. Q changes immediately when A changes) */ module ram256x16 ( input wire clk, // clock (pertinent for writes only) input wire we, // write-enable input wire [7:0] A, // address at which to read/write input wire [15:0] D, // data to store next clk (if write-enabled) output wire [15:0] Q // current memory contents at address A ); reg [15:0] mem [255:0]; always @ (posedge clk) begin if (we) mem[A] <= D; end assign Q = mem[A]; // initialize memory contents (works for simulation and synthesis!) integer i; initial begin for (i=0; i<256; i=i+1) mem[i] = 0; // assembled program code is loaded from asm.v `include "asm.v" end endmodule /* * N-bit-wide D-type flip flop, with write-enable */ module dffe_Nbit #(parameter N=1) ( input wire clk, // clock input wire ena, // write-enable input wire [N-1:0] d, // data to store in FF (if enabled) next clk output wire [N-1:0] q // current FF contents ); reg [N-1:0] qreg=0; always @ (posedge clk) begin if (ena) qreg <= d; end assign q = qreg; endmodule /* * Test bench */ module simple_cpu_tb; reg clk = 0; initial #50 forever #50 clk = ~clk; reg rst = 0; initial begin #100 rst = 1; #500 rst = 0; end integer t = 0; wire [15:0] memq, IR, A, out; wire [7:0] PC, memaddr; wire [3:0] fsm; wire [7:0] inst = IR[15:8]; simple_cpu cpu (.clk(clk), .reset(rst), .run(1), .PC(PC), .acc(A), .IR(IR), .fsm(fsm), .mem_q(memq), .memaddr(memaddr), .out(out)); always @ (posedge clk) begin t = $time; #10; $display( "t=%1d fsm=%-8s PC=%x memaddr=%x memq=%x IR=%x %4s acc=%x out=%x", t, fsm==0 ? "RESET" : fsm==1 ? "FETCH" : fsm==2 ? "DECODE" : fsm==3 ? "x_LOAD" : fsm==4 ? "x_STORE" : fsm==5 ? "x_STORE2" : fsm==6 ? "x_JUMP" : fsm==7 ? "x_JUMPZ" : fsm==8 ? "x_JUMPN" : fsm==9 ? "x_ADD" : fsm==10 ? "x_SUB" : fsm==11 ? "x_MUL" : fsm==12 ? "x_OUT" : "???" , PC, memaddr, memq, IR, inst==0 ? "LOAD" : inst==1 ? "STOR" : inst==2 ? "JUMP" : inst==3 ? "JMPZ" : inst==4 ? "JMPN" : inst==5 ? "ADD " : inst==6 ? "SUB " : inst==7 ? "MUL " : inst==8 ? "OUT " : "????", A, out); end initial #5000000 $finish; endmodule