APPENDIX // pe.v // Verilog-XL behavioral models of a reconfigurable-processing element // to be used as a submodule of a controlling system // for Dr. W. B. Ligon, E&CE Dept., Clemson U., 1992-4 // by Ken Winiecki module pe0 (p_clock, p_reset, p_instr, p_flags, n_in, s_in, e_in, w_in, r_in, n_out, s_out, e_out, w_out, r_out, m_clock, m_write, m_addr, m_in, m_out, dump_reg, dump_mem); parameter // defaults, should be overridden by instantiating module: ADDR_WIDTH = 10, // width of PE memory address, in bits WORD_WIDTH = 32, // width of PE word, in bits MEM_LENGTH = 1024, // length of PE memory, in words PE_NAME = "undefined"; // name of PE for dump identification input // (1 bit wide) p_clock, p_reset, // PE clock & reset, rising-edge triggered e_in, w_in, // east (lsb) & west (msb) communication m_clock, // PE memory clock for external access, rising-edge m_write, // PE memory read/write control for external access: // read = 0, write = 1 dump_reg, dump_mem; // PE register & memory dump clocks, rising-edge input [ADDR_WIDTH-1:0] m_addr; // PE memory address for external access input [WORD_WIDTH-1:0] n_in, s_in, r_in, // PE north & south & router (word) communication m_in; // PE memory data for external access input [ADDR_WIDTH+32-1:0] p_instr; // PE instruction (described below) output // (1 bit wide) e_out, w_out; // east (lsb) & west (msb) communication output [9:0] p_flags; // PE flags (described below) output [WORD_WIDTH-1:0] n_out, s_out, r_out, // PE north & south & router (word) communication m_out; // PE memory data for external access reg temp_dis; // (1 bit wide) reg [9:0] p_flags; reg [7:0] out_res, reg_res; reg [WORD_WIDTH-1:0] mem[0:MEM_LENGTH-1], regs[0:15], in_1, in_2, out, m_out, last_out, r_in2, r; reg [WORD_WIDTH:0] temp; wire e_out, w_out; wire [WORD_WIDTH-1:0] east, west; integer i, j, reg_dump_ct, mem_dump_ct, clock_ct; // PE instruction field definitions and content parameters: `define mem_addr p_instr[ADDR_WIDTH+32-1:32] `define mem_ctrl p_instr[31:30] parameter NO_OP = 2'b00, // no memory operation R_OUT = 2'b01, // read to memory at address from OUT register W_IN1 = 2'b10, // write from memory at address to IN1 register W_IN2 = 2'b11; // write from memory at address to IN2 register `define dest_reg p_instr[29:26] `define srce_reg p_instr[25:22] parameter R0 = 4'b0000, R1 = 4'b0001, R2 = 4'b0010, R3 = 4'b0011, R4 = 4'b0100, R5 = 4'b0101, R6 = 4'b0110, R7 = 4'b0111, R8 = 4'b1000, R9 = 4'b1001, DIS = 4'b1010, ROUT = 4'b1011, NORTH = 4'b1100, SOUTH = 4'b1101, EAST = 4'b1110, WEST = 4'b1111; `define outres p_instr[21:14] `define regres p_instr[13:6] `define carry p_instr[5:4] // Note: The PE only does EITHER a carry operation OR an ALU operation in // one instruction cycle. A carry word is computed using registers IN1 // and IN2 and the specified carry-in, and is made available at the input // from the router, r_in. The carry-out is placed in the carry flag of the // p_flags register. parameter C_DIS = 2'b00, // do not compute carry (do an ALU operation) C_CAR = 2'b01, // compute carry using carry flag for carry-in C_ZER = 2'b10, // compute carry using 0 for carry-in C_ONE = 2'b11; // compute carry using 1 for carry-in `define alu_dis p_instr[3] // allow disabling of ALU operation, 1=allow `define alu_dis_s p_instr[2] // sense of PE disable bit for ALU op, 1=invert `define mem_dis p_instr[1] // allow disabling of memory operation.... `define mem_dis_s p_instr[0] // sense of PE disable bit for memory op.... // PE flag definitions and bit positions: // Note that flags other than carry are not affected by a carry operation, // and the carry flag is not affected by an ALU operation. `define reg_msb_f p_flags[9] // m.s.b. of destination register `define reg_zer_f p_flags[8] // if destination register = 0 `define out_msb_f p_flags[7] // m.s.b. of register OUT `define out_zer_f p_flags[6] // if register OUT = 0 `define in2_msb_f p_flags[5] // m.s.b. of register IN2 `define in2_zer_f p_flags[4] // if register IN2 = 0 `define in1_msb_f p_flags[3] // m.s.b. of register IN1 `define in1_zer_f p_flags[2] // if register IN1 = 0 `define disable_f p_flags[1] // state of PE disable bit `define carry_f p_flags[0] // result of last carry operation assign n_out = regs[NORTH], s_out = regs[SOUTH], east = regs[EAST], // cannot access structure of regs[i]!!! e_out = east[WORD_WIDTH-1], west = regs[WEST], // cannot access structure of regs[i]!!! w_out = west[0], r_out = regs[ROUT]; initial begin reg_dump_ct = 0; mem_dump_ct = 0; clock_ct = 0; end always @ (posedge p_clock) begin // instruction cycle clock_ct = clock_ct + 1; last_out = out; // save OUT reg for mem op temp_dis = regs[DIS] || 0; // save disable reg for mem op // if ALU/carry operations not disabled, then perform one if ( ~(`alu_dis & (`alu_dis_s ^ temp_dis)) ) begin // carry setup or register read case (`carry) C_DIS: begin case (`srce_reg) ROUT: temp = r_in2; // should be temp = r_in (default) (port from router), but // router doesn't exist, but router supposed to produce // carry word, so PE will produce carry word and place in // register r_in2, thus r_in doesn't really work EAST: begin temp = regs[EAST] << 1; temp[0] = e_in; end WEST: begin temp = regs[WEST] >> 1; temp[WORD_WIDTH-1] = w_in; end default temp = regs[`srce_reg]; endcase out_res = `outres; // cannot access structure of `outres!!! reg_res = `regres; // cannot access structure of `regres!!! end C_CAR: temp = `carry_f; C_ZER: temp = 0; C_ONE: temp = 1; endcase // carry or ALU computation if (`carry) for (i=0; i> 1; temp[WORD_WIDTH-1] = w_in; end RR: temp = r_in2; // should be temp = r_in (port from router), but router // doesn't exist, but router supposed to produce carry // word, so PE will produce carry word and place in // register r_in2, thus r_in doesn't really work default temp = regs[`srce_reg]; endcase out_res = `outres; // cannot access structure of `outres!!! reg_res = `regres; // cannot access structure of `regres!!! end CC: temp = `carry_f; C0: temp = 0; C1: temp = 1; endcase // operation execution if (`carry) // carry operation for (i=0; i> `shift % WORD_WIDTH; if (w_in) for (i=0; i<`shift%WORD_WIDTH; i=i+1) temp[WORD_WIDTH-1-i] = 1; // since Verilog doesn't allow variable part-select!!! end RR: temp = r_in2; // should be temp = r_in (port from router), but router // doesn't exist, but router supposed to produce carry // word, so PE will produce carry word and place in // register r_in2, thus r_in doesn't really work default temp = regs[`srce_reg]; endcase out_res = `outres; // since cannot access bits of `outres!!! reg_res = `regres; // since cannot access bits of `regres!!! end CC: temp = `carry_f; C0: temp = 0; C1: temp = 1; endcase // operation execution if (`carry) // carry operation for (i=0; i> `shift % WORD_WIDTH; if (w_in) for (i=0; i<`shift%WORD_WIDTH; i=i+1) temp[WORD_WIDTH-1-i] = 1; // since Verilog doesn't allow variable part-select!!! end // note: should have... // RR: temp = r_in; // ...but router does not exist, but router supposed to pass // carry word through, so will let temp = regs[RR], thus r_in // will not work default temp = regs[`srce_reg]; endcase out_res = `outres; // since cannot access bits of `outres!!! reg_res = `regres; // since cannot access bits of `regres!!! end // operation execution if (`car_in1 || `car_in2) // carry operation for (i=0; i> `shift % WORD_WIDTH; // zeros shifted in if ( `ew_srce == EW1 || `ew_srce == EWIN && w_in || `ew_srce == EWOU && w_out ) // if source is a 1, for (i=0; i<`shift%WORD_WIDTH; i=i+1) // then "shift" it in alu_wrk[WORD_WIDTH-1-i] = 1; // use loop since Verilog doesn't allow variable part-select!!! end default: alu_wrk = regs[`srce_reg]; endcase if (`car_srce) car_wrk = `car_val; else car_wrk = `carry_f; reg_op = `regop; // needed since cannot access bits of `regop!!! out_op = `outop; // needed since cannot access bits of `outop!!! // Execute operation and store results if (`car_in1 || `car_in2) // carry operation begin for (i=0; i in_2 // 1: mem[1] -> in_2 instr_0 = { `M 0, MEM, F, T, F, NALU, CN, NDIS }; instr_1 = { `M 1, MEM, F, T, F, NALU, CN, NDIS }; clockem; // both: if in_2 negative, then out = ~in_2, in_2 = out, compute carry for // in_2 + 1; otherwise in_2 positive, so compute carry for in_2 + 0 if (flags_0[IN2_MSB_F]) condition_0 = 1; else condition_0 = 0; if (flags_1[IN2_MSB_F]) condition_1 = 1; else condition_1 = 0; if (condition_0^condition_1) negative = 1; else negative = 0; if (condition_0) instr_0 = { NMEM, R0, SRC, R0, NIN2, CN, NDIS }; else instr_0 = { NOOP }; if (condition_1) instr_1 = { NMEM, R0, SRC, R0, NIN2, CN, NDIS }; else instr_1 = { NOOP }; clockem; if (condition_0) instr_0 = { `M 0, OUT, F, T, F, NALU, CN, NDIS }; else instr_0 = { NOOP }; if (condition_1) instr_1 = { `M 0, OUT, F, T, F, NALU, CN, NDIS }; else instr_1 = { NOOP }; clockem; if (condition_0) instr_0 = { NMEM, NALU, C1, NDIS }; else instr_0 = { NMEM, NALU, C0, NDIS }; if (condition_1) instr_1 = { NMEM, NALU, C1, NDIS }; else instr_1 = { NMEM, NALU, C0, NDIS }; clockem; // 0: south & out = in_2 + c // 1: north & out = in_2 + c instr_0 = { NMEM, RR, SUM12S, RS, SUM12S, CN, NDIS }; instr_1 = { NMEM, RR, SUM12S, RN, SUM12S, CN, NDIS }; clockem; // 0: out -> in_2, east = south // 1: out -> in_2, west = north instr_0 = { `M 0, OUT, F, T, F, RS, SRC, RE, ZEROS, CN, NDIS }; instr_1 = { `M 0, OUT, F, T, F, RN, SRC, RW, ZEROS, CN, NDIS }; clockem; // 0: west = in_2 // 1: east = in_2 instr_0 = { NMEM, R0, IN2, RW, ZEROS, CN, NDIS }; instr_1 = { NMEM, R0, IN2, RE, ZEROS, CN, NDIS }; clockem; // 0: west = west rotated right // 1: do nothing instr_0 = { NMEM, RW, SRC, RW, ZEROS, CN, NDIS }; instr_1 = { NOOP }; clockem; // 0: check for upcoming overflow possibility, // east & out = east shifted left with 0 // 1: out = in_2 if (e_out_0) op_ov_0 = 1; else op_ov_0 = 0; instr_0 = { NMEM, RE, SRC, RE, SRC, CN, NDIS }; instr_1 = { NMEM, R0, ZEROS, R0, IN2, CN, NDIS }; clockem; op_ov_1 = 0; overflow = 0; repeat (WORD_WIDTH/2) begin // both: out -> in_2, west = west rotated right, clear out instr_0 = { `M 0, OUT, F, T, F, RW, SRC, RW, ZEROS, CN, NDIS }; instr_1 = { `M 0, OUT, F, T, F, RW, SRC, RW, ZEROS, CN, NDIS }; clockem; // 0: if op bit = 1, compute carry for in_1 + in_2, south & out // = in_1 + in_2, check overflow; otherwise, south & out = in_1 // 1: if op bit = 1, compute carry for in_1 + in_2, north & out // = in_1 + in_2, check overflow; otherwise, north & out = in_1 if (flags_0[REG_MSB_F]) condition_0 = 1; else condition_0 = 0; if (flags_1[REG_MSB_F]) condition_1 = 1; else condition_1 = 0; if (condition_0) instr_0 = { NMEM, NALU, C0, NDIS }; else instr_0 = { NOOP }; if (condition_1) instr_1 = { NMEM, NALU, C0, NDIS }; else instr_1 = { NOOP }; clockem; if (condition_0) instr_0 = { NMEM, RR, SUM12S, RS, SUM12S, CN, NDIS }; else instr_0 = { NMEM, RR, IN1, RS, IN1, CN, NDIS }; if (condition_1) instr_1 = { NMEM, RR, SUM12S, RN, SUM12S, CN, NDIS }; else instr_1 = { NMEM, RR, IN1, RN, IN1, CN, NDIS }; clockem; if ( condition_0 && (flags_0[CARRY_F] || op_ov_0) || condition_1 && (flags_1[CARRY_F] || op_ov_1) ) overflow = 1; // both: out -> in_1, west = west rotated right instr_0 = { `M 0, OUT, F, F, T, RW, SRC, RW, ZEROS, CN, NDIS }; instr_1 = { `M 0, OUT, F, F, T, RW, SRC, RW, ZEROS, CN, NDIS }; clockem; // both: east = east shifted left with zero instr_0 = { NMEM, RE, SRC, RE, ZEROS, CN, NDIS }; instr_1 = { NMEM, RE, SRC, RE, ZEROS, CN, NDIS }; clockem; // both: check for upcoming overflow possibility, // east & out = east shifted left with zero if (e_out_0) op_ov_0 = 1; if (e_out_1) op_ov_1 = 1; instr_0 = { NMEM, RE, SRC, RE, SRC, CN, NDIS }; instr_1 = { NMEM, RE, SRC, RE, SRC, CN, NDIS }; clockem; end // 0: out = south // 1: out = north instr_0 = { NMEM, RS, ZEROS, R0, SRC, CN, NDIS }; instr_1 = { NMEM, RN, ZEROS, R0, SRC, CN, NDIS }; clockem; // both: out -> in_2, clear out instr_0 = { `M 0, OUT, F, T, F, NALU, CN, NDIS }; instr_1 = { `M 0, OUT, F, T, F, NALU, CN, NDIS }; clockem; // both: compute carry for in_1 + in_2 instr_0 = { NMEM, NALU, C0, NDIS }; instr_1 = { NMEM, NALU, C0, NDIS }; clockem; // both: out = in_1 + in_2, clear in_2, check overflow instr_0 = { `M 0, OUT, F, T, F, RR, ZEROS, R0, SUM12S, CN, NDIS }; instr_1 = { `M 0, OUT, F, T, F, RR, ZEROS, R0, SUM12S, CN, NDIS }; clockem; if (flags_0[CARRY_F] || flags_1[CARRY_F]) overflow = 1; if (negative) begin // negate and save result // both: out -> in_1 instr_0 = { `M 0, OUT, F, F, T, NALU, CN, NDIS }; instr_1 = { `M 0, OUT, F, F, T, NALU, CN, NDIS }; clockem; // both: out = ~in_1 instr_0 = { NMEM, R0, SRC, R0, NIN1, CN, NDIS }; instr_1 = { NMEM, R0, SRC, R0, NIN1, CN, NDIS }; clockem; // both: out -> in_1 instr_0 = { `M 0, OUT, F, F, T, NALU, CN, NDIS }; instr_1 = { `M 0, OUT, F, F, T, NALU, CN, NDIS }; clockem; // both: compute carry for in_1 + 1 instr_0 = { NMEM, NALU, C1, NDIS }; instr_1 = { NMEM, NALU, C1, NDIS }; clockem; // both: out = in_1 + c instr_0 = { NMEM, RR, ZEROS, R0, SUM12S, CN, NDIS }; instr_1 = { NMEM, RR, ZEROS, R0, SUM12S, CN, NDIS }; clockem; // both: out -> mem[2] instr_0 = { `M 2, OUT, T, F, F, NALU, CN, NDIS }; instr_1 = { `M 2, OUT, T, F, F, NALU, CN, NDIS }; clockem; end else // positive begin // save result // both: out -> mem[2] instr_0 = { `M 2, OUT, T, F, F, NALU, CN, NDIS }; instr_1 = { `M 2, OUT, T, F, F, NALU, CN, NDIS }; clockem; // both: do nothing repeat (5) begin instr_0 = { NOOP }; instr_1 = { NOOP }; clockem; end end r_0_dump; m_0_dump; $display("unsigned: %0d x %0d = %0d, overflow = %0d", pe_0.mem[0], pe_0.mem[1], pe_0.mem[2], overflow); form_feed; r_1_dump; m_1_dump; $display("unsigned: %0d x %0d = %0d, overflow = %0d", pe_1.mem[0], pe_1.mem[1], pe_1.mem[2], overflow); $display("%0d clock cycles", clock_ct); end task reset_regs; begin #NS reset = 1; #NS reset = 0; end endtask task clockem; begin #NS clock = 1; #NS clock = 0; clock_ct = clock_ct + 1; end endtask task r_0_dump; begin #NS dump_r_0 = 1; #NS dump_r_0 = 0; end endtask task m_0_dump; begin #NS dump_m_0 = 1; #NS dump_m_0 = 0; end endtask task r_1_dump; begin #NS dump_r_1 = 1; #NS dump_r_1 = 0; end endtask task m_1_dump; begin #NS dump_m_1 = 1; #NS dump_m_1 = 0; end endtask task form_feed; $write("\14"); endtask endmodule Host command: verilog Command arguments: pe.v pc04-.v VERILOG-XL 1.6a.4 log file created Jul 14, 1994 19:13:20 * Copyright Cadence Design Systems, Inc. 1985, 1988. * * All Rights Reserved. Licensed Software. * * Confidential and proprietary information which is the * * property of Cadence Design Systems, Inc. * Compiling source file "pe.v" Compiling source file "pc04-.v" Highest level modules: pe0 pe2 pe3 pe4 pc4 2's-complement Integer 1x1=1-word Multiply with 2 PE's PE "PE_1" Reset, Clock Cycle # 0 PE "PE_0" Reset, Clock Cycle # 0 PE "PE_0" Port and Register Dump # 0, Clock Cycle # 115 PE_INSTR: mb_addr 0000 mb_srce mb_d_mem mb_d_in2 mb_d_in1 srce_reg reg_res dest_reg 1 0 0 0 0000 10101010 0000 out_res carry alu_dis alu_dis_i mb_dis mb_dis_i (p) 00000000 00 0 0 0 0 FLAGS: reg<0 reg=0 out<0 out=0 in2<0 in2=0 in1<0 in1=0 0 1 0 1 0 1 1 0 disab carry (r) 0 0 IN1: 10101010101010100000000000000000 (r) IN2: 00000000000000000000000000000000 (r) OUT: 00000000000000000000000000000000 (r) R0: 00000000000000000000000000000000 (r) R1: 00000000000000000000000000000000 (r) R2: 00000000000000000000000000000000 (r) R3: 00000000000000000000000000000000 (r) R4: 00000000000000000000000000000000 (r) R5: 00000000000000000000000000000000 (r) R6: 00000000000000000000000000000000 (r) R7: 00000000000000000000000000000000 (r) R8: 00000000000000000000000000000000 (r) R9: 00000000000000000000000000000000 (r) DISABLE: 00000000000000000000000000000000 (r) ROUTER: 00000000000000000000000000000000 (r) R_IN: 00000000000000000000000000000000 (p/r) NORTH: 00000000000000000000000000000000 (r) N_IN: 00000000000000000000000000000000 (p) SOUTH: 10101010101010100000000000000000 (r) S_IN: 01010101010101010000000000000000 (p) EAST: 00000000000000000000000000000000 (r) E_IN: 0 (p) W_IN: 1 (p) WEST: 10000000000000001111111111111111 (r) PE "PE_0" Memory Dump # 0, Clock Clycle # 115 0: 0001ffff 00010000 ffff0000 xxxxxxxx 4: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 8: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx c: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx unsigned: 131071 x 65536 = 4294901760, overflow = 1 PE "PE_1" Port and Register Dump # 0, Clock Cycle # 115 PE_INSTR: mb_addr 0000 mb_srce mb_d_mem mb_d_in2 mb_d_in1 srce_reg reg_res dest_reg 1 0 0 0 0000 10101010 0000 out_res carry alu_dis alu_dis_i mb_dis mb_dis_i (p) 00000000 00 0 0 0 0 FLAGS: reg<0 reg=0 out<0 out=0 in2<0 in2=0 in1<0 in1=0 0 1 0 1 0 1 0 0 disab carry (r) 0 0 IN1: 01010101010101010000000000000000 (r) IN2: 00000000000000000000000000000000 (r) OUT: 00000000000000000000000000000000 (r) R0: 00000000000000000000000000000000 (r) R1: 00000000000000000000000000000000 (r) R2: 00000000000000000000000000000000 (r) R3: 00000000000000000000000000000000 (r) R4: 00000000000000000000000000000000 (r) R5: 00000000000000000000000000000000 (r) R6: 00000000000000000000000000000000 (r) R7: 00000000000000000000000000000000 (r) R8: 00000000000000000000000000000000 (r) R9: 00000000000000000000000000000000 (r) DISABLE: 00000000000000000000000000000000 (r) ROUTER: 00000000000000000000000000000000 (r) R_IN: 00000000000000000000000000000000 (p/r) NORTH: 01010101010101010000000000000000 (r) N_IN: 10101010101010100000000000000000 (p) SOUTH: 00000000000000000000000000000000 (r) S_IN: 00000000000000000000000000000000 (p) EAST: 00000000000000000000000000000000 (r) E_IN: 0 (p) W_IN: 1 (p) WEST: 00000000000000011111111111111111 (r) PE "PE_1" Memory Dump # 0, Clock Clycle # 115 0: 0001ffff 00010000 ffff0000 xxxxxxxx 4: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 8: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx c: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx unsigned: 131071 x 65536 = 4294901760, overflow = 1 115 clock cycles 38 warnings 31051 simulation events CPU time: 1.3 secs to compile + 0.6 secs to link + 4.8 secs in simulation End of VERILOG-XL 1.6a.4 Jul 14, 1994 19:13:27 module pc5; // pc05-2.v // a Verilog-XL behavioral model of a reconfigurable processor configuration // for a 2's-complement integer shift-add 1x1=1 word multiply with 2 PE's // (using variable-shift EAST & WEST registers) // (highest level module; requires module pe2 in file pe.v) // for Dr. W. B. Ligon, E&CE dept., Clemson U., 1992-4 // by Ken Winiecki // system parameters: `define ADDR_WIDTH 4 // width of PE memory address, in bits parameter WORD_WIDTH = 32, // width of PE word, in bits MEM_LENGTH = 16, // length of PE memory, in words MHZ = 16.7, // clock speed, in "megahertz" NS = 500/MHZ, // clock half-period, in "nanoseconds" ADDR_WIDTH = `ADDR_WIDTH; // Had to use a "`define" above to // circumnavigate a Verilog-XL bug that // prevents parameters from working as // bit length specifiers!!! // variable declarations: reg clock, reset, dump_r_0, dump_m_0, dump_r_1, dump_m_1, condition_0, condition_1, negative, init, init_add_0, last_e_out_1, op_ov_0, op_ov_1, overflow; reg[5:0] shift; reg [ADDR_WIDTH+40-1:0] instr_0, instr_1; wire e_out_0, w_0, e_out_1, w_1; wire [9:0] flags_0, flags_1; wire [WORD_WIDTH-1:0] s0_n1, n1_s0; integer clock_ct; // PE instances and connections: pe2 pe_0 (clock, reset, instr_0, flags_0, 0, n1_s0, 0, w_0, 0, , s0_n1, e_out_0, w_0, , 0,,,,, dump_r_0, dump_m_0); pe2 pe_1 (clock, reset, instr_1, flags_1, s0_n1, 0, 0, w_1, 0, n1_s0, , e_out_1, w_1, , 0,,,,, dump_r_1, dump_m_1); defparam // set PE-instance parameters to system parameters: pe_0.ADDR_WIDTH = ADDR_WIDTH, pe_0.WORD_WIDTH = WORD_WIDTH, pe_0.MEM_LENGTH = MEM_LENGTH, pe_0.PE_NAME = "PE_0", pe_1.ADDR_WIDTH = ADDR_WIDTH, pe_1.WORD_WIDTH = WORD_WIDTH, pe_1.MEM_LENGTH = MEM_LENGTH, pe_1.PE_NAME = "PE_1"; // PE instruction fields and bit widths: // mb_addr mb_srce mb_d_mem mb_d_in2 mb_d_in1 src_reg shift ... // (ADDR_WIDTH) 1 1 1 1 4 6 // reg_res dest_reg out_res carry alu_dis alu_dis_s mb_dis mb_dis_s // 8 4 8 2 1 1 1 1 parameter // PE instruction field values: // mb_addr: memory bus transfer address // mb_srce: source of memory bus transfer is ... OUT = 1'b0, // ... register "OUT" MEM = 1'b1, // ... memory // mb_d_mem & mb_d_in2 & mb_d_in1: destinations of memory bus transfers F = 1'b0, // false T = 1'b1, // true // srce_reg & dest_reg: destination and source registers of ALU operations R0 = 4'b0000, R1 = 4'b0001, R2 = 4'b0010, R3 = 4'b0011, R4 = 4'b0100, R5 = 4'b0101, R6 = 4'b0110, R7 = 4'b0111, R8 = 4'b1000, R9 = 4'b1001, RD = 4'b1010, RR = 4'b1011, RN = 4'b1100, RS = 4'b1101, RE = 4'b1110, RW = 4'b1111, // shift: shift east or west source register "shift" bits // reg_res & out_res: ALU operands for "OUT" and destination register results IN1 = 8'b11110000, IN2 = 8'b11001100, SRC = 8'b10101010, NIN1 = 8'b00001111, NIN2 = 8'b00110011, NSRC = 8'b01010101, ZEROS = 8'b00000000, ONES = 8'b11111111, AND12 = 8'b11000000, OR12 = 8'b11111100, XOR12 = 8'b00111100, NAND12 = 8'b00111111, NOR12 = 8'b00000011, NXOR12 = 8'b11000011, AND1S = 8'b10100000, OR1S = 8'b11111010, XOR1S = 8'b01011010, NAND1S = 8'b01011111, NOR1S = 8'b00000101, NXOR1S = 8'b10100101, AND2S = 8'b10001000, OR2S = 8'b11101110, XOR2S = 8'b01100110, NAND2S = 8'b01110111, NOR2S = 8'b00010001, NXOR2S = 8'b10011001, AND12S = 8'b10000000, OR12S = 8'b11111110, XOR12S = 8'b01111110, NAND12S = 8'b01111111, NOR12S = 8'b00000001, NXOR12S = 8'b10000001, SUM12S = 8'b10010110, // carry: // Note: The PE only does EITHER a carry operation OR an ALU operation in // one instruction cycle. A carry word is computed using registers IN_1 // and IN_2 and the specified carry-in, and is made available at the input // from the router, r_in. The carry-out is placed in the carry flag of the // p_flags register. CN = 2'b00, // do not compute carry (do an ALU operation) CC = 2'b01, // compute carry using carry flag for carry-in C0 = 2'b10, // compute carry using 0 for carry-in C1 = 2'b11; // compute carry using 1 for carry-in // alu_dis & mb_dis: allow disabling of ALU/memory bus operation; T or F // alu_dis_i & mb_dis_i: invert PE disable bit for ALU/memory bus op; T or F // convenience parameters and definitions for writing PE instructions: `define M `ADDR_WIDTH'h // when specifying memory address (in hexidecimal) parameter NMEM = { `M 0, MEM, F, F, F }; // when not using memory bus `define SH 6'd // when specifying shift (in decimal) parameter NALU = { R0, `SH 0, SRC, R0, ZEROS }; // when not doing ALU ops // (note that flags are affected and OUT is cleared) parameter NDIS = { F, F, F, F }; // when not allowing any disabling parameter NOOP = { NMEM, NALU, CN, NDIS }; // when squandering clock cycles // (note that flags are affected and OUT is cleared) parameter // bit positions of PE flags // Note that flags other than carry are not affected by a carry operation, // and the carry flag is not affected by an ALU operation. REG_MSB_F = 9, // m.s.b. of destination register REG_ZER_F = 8, // if destination register = 0 OUT_MSB_F = 7, // m.s.b. of register OUT OUT_ZER_F = 6, // if register OUT = 0 IN2_MSB_F = 5, // m.s.b. of register IN_2 IN2_ZER_F = 4, // if register IN_2 = 0 IN1_MSB_F = 3, // m.s.b. of register IN_1 IN1_ZER_F = 2, // if register IN_1 = 0 DISABLE_F = 1, // state of PE disable bit CARRY_F = 0; // result of last carry operation // Write a program in Verilog-XL to control the processing elements directly, // and put it in the "initial" block to execute. Set up the PE memory. // Reset the PE by clocking it's p_reset line. (Clock a line using #NS to // delay each transition.) Build a PE instruction using the above defined // instruction fields and value parameters. Clock the PEs. Monitor the // flags of a PE using the above bit position parameters. Note: Verilog-XL // has no way of dealing realistically with "don't cares," so never allow the // use of "don't knows" (undefined values)!!! initial begin form_feed; $display("2's-complement Integer 1x1=1-word Multiply with 2 PE's"); clock = 0; clock_ct = 0; dump_r_0 = 0; dump_m_0 = 0; dump_r_1 = 0; dump_m_1 = 0; pe_0.mem[0] = 32'h10000; pe_0.mem[1] = 32'h1ffff; pe_1.mem[0] = pe_0.mem[0]; pe_1.mem[1] = pe_0.mem[1]; // both: clear all registers reset_regs; // 0: mem[0] -> in_2 // 1: mem[1] -> in_2 instr_0 = { `M 0, MEM, F, T, F, NALU, CN, NDIS }; instr_1 = { `M 1, MEM, F, T, F, NALU, CN, NDIS }; clockem; // both: if in_2 negative, then out = ~in_2, in_2 = out, compute carry for // in_2 + 1; otherwise in_2 positive, so compute carry for in_2 + 0 if (flags_0[IN2_MSB_F]) condition_0 = 1; else condition_0 = 0; if (flags_1[IN2_MSB_F]) condition_1 = 1; else condition_1 = 0; if (condition_0^condition_1) negative = 1; else negative = 0; if (condition_0) instr_0 = { NMEM, R0, `SH 0, SRC, R0, NIN2, CN, NDIS }; else instr_0 = { NOOP }; if (condition_1) instr_1 = { NMEM, R0, `SH 0, SRC, R0, NIN2, CN, NDIS }; else instr_1 = { NOOP }; clockem; if (condition_0) instr_0 = { `M 0, OUT, F, T, F, NALU, CN, NDIS }; else instr_0 = { NOOP }; if (condition_1) instr_1 = { `M 0, OUT, F, T, F, NALU, CN, NDIS }; else instr_1 = { NOOP }; clockem; if (condition_0) instr_0 = { NMEM, NALU, C1, NDIS }; else instr_0 = { NMEM, NALU, C0, NDIS }; if (condition_1) instr_1 = { NMEM, NALU, C1, NDIS }; else instr_1 = { NMEM, NALU, C0, NDIS }; clockem; // 0: south & out = in_2 + c // 1: north & out = in_2 + c instr_0 = { NMEM, RR, `SH 0, SUM12S, RS, SUM12S, CN, NDIS }; instr_1 = { NMEM, RR, `SH 0, SUM12S, RN, SUM12S, CN, NDIS }; clockem; // 0: out -> in_2, east = south // 1: out -> in_2, west = north instr_0 = { `M 0, OUT, F, T, F, RS, `SH 0, SRC, RE, ZEROS, CN, NDIS }; instr_1 = { `M 0, OUT, F, T, F, RN, `SH 0, SRC, RW, ZEROS, CN, NDIS }; clockem; // 0: west = in_2 // 1: east = in_2 instr_0 = { NMEM, R0, `SH 0, IN2, RW, ZEROS, CN, NDIS }; instr_1 = { NMEM, R0, `SH 0, IN2, RE, ZEROS, CN, NDIS }; clockem; // 0: west = west shifted right a half word // 1: do nothing shift = WORD_WIDTH/2; // Note must use a register because a "parameter" cannot be created that // is a sized value of an expression, and a "`define" cannot be created // that is a value of an expression!!! instr_0 = { NMEM, RW, shift, SRC, RW, ZEROS, CN, NDIS }; instr_1 = { NOOP }; clockem; // 0: east & out = east shifted left a half word with 0's // 1: out = in_2 instr_0 = { NMEM, RE, shift, SRC, RE, SRC, CN, NDIS }; instr_1 = { NMEM, R0, `SH 0, ZEROS, R0, IN2, CN, NDIS }; clockem; init = 1; init_add_0 = 0; op_ov_0 = 0; op_ov_1 = 0; overflow = 0; repeat (WORD_WIDTH/2) begin // both: out -> in_2, west = west rotated right, clear out instr_0 = { `M 0, OUT, F, T, F, RW, `SH 1, SRC, RW, ZEROS, CN, NDIS }; instr_1 = { `M 0, OUT, F, T, F, RW, `SH 1, SRC, RW, ZEROS, CN, NDIS }; clockem; // 0: if op bit = 1, compute carry for in_1 + in_2, south & out // = in_1 + in_2, check overflow; otherwise, south & out = in_1 // 1: if op bit = 1, compute carry for in_1 + in_2, north & out // = in_1 + in_2, check overflow; otherwise, north & out = in_1 if (flags_0[REG_MSB_F]) condition_0 = 1; else condition_0 = 0; if (flags_1[REG_MSB_F]) condition_1 = 1; else condition_1 = 0; if (condition_0) instr_0 = { NMEM, NALU, C0, NDIS }; else instr_0 = { NOOP }; if (condition_1) instr_1 = { NMEM, NALU, C0, NDIS }; else instr_1 = { NOOP }; clockem; if (condition_0) instr_0 = { NMEM, RR, `SH 0, SUM12S, RS, SUM12S, CN, NDIS }; else instr_0 = { NMEM, RR, `SH 0, IN1, RS, IN1, CN, NDIS }; if (condition_1) instr_1 = { NMEM, RR, `SH 0, SUM12S, RN, SUM12S, CN, NDIS }; else instr_1 = { NMEM, RR, `SH 0, IN1, RN, IN1, CN, NDIS }; clockem; if ( condition_0 && (flags_0[CARRY_F] || op_ov_0) || condition_1 && (flags_1[CARRY_F] || op_ov_1) ) overflow = 1; if ( init && condition_0 ) init_add_0 = 1; init = 0; // check for upcoming overflow possibility // both: out -> in_1, east & out = east shifted left with zero if (e_out_0) op_ov_0 = 1; if (e_out_1) op_ov_1 = 1; last_e_out_1 = e_out_1; instr_0 = { `M 0, OUT, F, F, T, RE, `SH 1, SRC, RE, SRC, CN, NDIS }; instr_1 = { `M 0, OUT, F, F, T, RE, `SH 1, SRC, RE, SRC, CN, NDIS }; clockem; end // check for overflow from first iteration // 0: out = south // 1: out = north if (init_add_0 && last_e_out_1) overflow = 1; instr_0 = { NMEM, RS, `SH 0, ZEROS, R0, SRC, CN, NDIS }; instr_1 = { NMEM, RN, `SH 0, ZEROS, R0, SRC, CN, NDIS }; clockem; // both: out -> in_2, clear out instr_0 = { `M 0, OUT, F, T, F, NALU, CN, NDIS }; instr_1 = { `M 0, OUT, F, T, F, NALU, CN, NDIS }; clockem; // both: compute carry for in_1 + in_2 instr_0 = { NMEM, NALU, C0, NDIS }; instr_1 = { NMEM, NALU, C0, NDIS }; clockem; // both: out = in_1 + in_2, clear in_2 // check overflow instr_0 = { `M 0, OUT, F, T, F, RR, `SH 0, ZEROS, R0, SUM12S, CN, NDIS }; instr_1 = { `M 0, OUT, F, T, F, RR, `SH 0, ZEROS, R0, SUM12S, CN, NDIS }; clockem; if ( flags_0[CARRY_F] || flags_1[CARRY_F] ) overflow = 1; if (negative) begin // negate and save result // both: out -> in_1 instr_0 = { `M 0, OUT, F, F, T, NALU, CN, NDIS }; instr_1 = { `M 0, OUT, F, F, T, NALU, CN, NDIS }; clockem; // both: out = ~in_1 instr_0 = { NMEM, R0, `SH 0, SRC, R0, NIN1, CN, NDIS }; instr_1 = { NMEM, R0, `SH 0, SRC, R0, NIN1, CN, NDIS }; clockem; // both: out -> in_1 instr_0 = { `M 0, OUT, F, F, T, NALU, CN, NDIS }; instr_1 = { `M 0, OUT, F, F, T, NALU, CN, NDIS }; clockem; // both: compute carry for in_1 + 1 instr_0 = { NMEM, NALU, C1, NDIS }; instr_1 = { NMEM, NALU, C1, NDIS }; clockem; // both: out = in_1 + c instr_0 = { NMEM, RR, `SH 0, ZEROS, R0, SUM12S, CN, NDIS }; instr_1 = { NMEM, RR, `SH 0, ZEROS, R0, SUM12S, CN, NDIS }; clockem; // both: out -> mem[2] instr_0 = { `M 2, OUT, T, F, F, NALU, CN, NDIS }; instr_1 = { `M 2, OUT, T, F, F, NALU, CN, NDIS }; clockem; end else // positive begin // save result // both: out -> mem[2] instr_0 = { `M 2, OUT, T, F, F, NALU, CN, NDIS }; instr_1 = { `M 2, OUT, T, F, F, NALU, CN, NDIS }; clockem; // both: do nothing 5 times instr_0 = { NOOP }; instr_1 = { NOOP }; clockem; clockem; clockem; clockem; clockem; end r_0_dump; m_0_dump; $display("unsigned: %0d x %0d = %0d, overflow = %0d", pe_0.mem[0], pe_0.mem[1], pe_0.mem[2], overflow); form_feed; r_1_dump; m_1_dump; $display("unsigned: %0d x %0d = %0d, overflow = %0d", pe_1.mem[0], pe_1.mem[1], pe_1.mem[2], overflow); $display("%0d clock cycles", clock_ct); end task reset_regs; begin #NS reset = 1; #NS reset = 0; end endtask task clockem; begin #NS clock = 1; #NS clock = 0; clock_ct = clock_ct + 1; end endtask task r_0_dump; begin #NS dump_r_0 = 1; #NS dump_r_0 = 0; end endtask task m_0_dump; begin #NS dump_m_0 = 1; #NS dump_m_0 = 0; end endtask task r_1_dump; begin #NS dump_r_1 = 1; #NS dump_r_1 = 0; end endtask task m_1_dump; begin #NS dump_m_1 = 1; #NS dump_m_1 = 0; end endtask task form_feed; $write("\14"); endtask endmodule Host command: verilog Command arguments: pe.v pc05-2.v VERILOG-XL 1.6a.4 log file created Jul 14, 1994 19:13:33 * Copyright Cadence Design Systems, Inc. 1985, 1988. * * All Rights Reserved. Licensed Software. * * Confidential and proprietary information which is the * * property of Cadence Design Systems, Inc. * Compiling source file "pe.v" Compiling source file "pc05-2.v" Highest level modules: pe0 pe pe3 pe4 pc5 2's-complement Integer 1x1=1-word Multiply with 2 PE's PE "PE_1" Reset, Clock Cycle # 0 PE "PE_0" Reset, Clock Cycle # 0 PE "PE_0" Port and Register Dump # 0, Clock Cycle # 83 PE_INSTR: mb_addr 0000 mb_srce mb_d_mem mb_d_in2 mb_d_in1 srce_reg shift reg_res 1 0 0 0 0000 000000 10101010 dest_reg out_res carry alu_dis alu_dis_i mb_dis mb_dis_i (p) 0000 00000000 00 0 0 0 0 FLAGS: reg<0 reg=0 out<0 out=0 in2<0 in2=0 in1<0 in1=0 0 1 0 1 0 1 1 0 disab carry (r) 0 0 IN1: 11111111111111110000000000000000 (r) IN2: 00000000000000000000000000000000 (r) OUT: 00000000000000000000000000000000 (r) R0: 00000000000000000000000000000000 (r) R1: 00000000000000000000000000000000 (r) R2: 00000000000000000000000000000000 (r) R3: 00000000000000000000000000000000 (r) R4: 00000000000000000000000000000000 (r) R5: 00000000000000000000000000000000 (r) R6: 00000000000000000000000000000000 (r) R7: 00000000000000000000000000000000 (r) R8: 00000000000000000000000000000000 (r) R9: 00000000000000000000000000000000 (r) DISABLE: 00000000000000000000000000000000 (r) ROUTER: 00000000000000000000000000000000 (r) R_IN: 00000000000000000000000000000000 (p/r) NORTH: 00000000000000000000000000000000 (r) N_IN: 00000000000000000000000000000000 (p) SOUTH: 11111111111111110000000000000000 (r) S_IN: 00000000000000000000000000000000 (p) EAST: 00000000000000000000000000000000 (r) E_IN: 0 (p) W_IN: 0 (p) WEST: 00000000000000010000000000000000 (r) PE "PE_0" Memory Dump # 0, Clock Clycle # 83 0: 00010000 0001ffff ffff0000 xxxxxxxx 4: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 8: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx c: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx unsigned: 65536 x 131071 = 4294901760, overflow = 1 PE "PE_1" Port and Register Dump # 0, Clock Cycle # 83 PE_INSTR: mb_addr 0000 mb_srce mb_d_mem mb_d_in2 mb_d_in1 srce_reg shift reg_res 1 0 0 0 0000 000000 10101010 dest_reg out_res carry alu_dis alu_dis_i mb_dis mb_dis_i (p) 0000 00000000 00 0 0 0 0 FLAGS: reg<0 reg=0 out<0 out=0 in2<0 in2=0 in1<0 in1=0 0 1 0 1 0 1 0 1 disab carry (r) 0 0 IN1: 00000000000000000000000000000000 (r) IN2: 00000000000000000000000000000000 (r) OUT: 00000000000000000000000000000000 (r) R0: 00000000000000000000000000000000 (r) R1: 00000000000000000000000000000000 (r) R2: 00000000000000000000000000000000 (r) R3: 00000000000000000000000000000000 (r) R4: 00000000000000000000000000000000 (r) R5: 00000000000000000000000000000000 (r) R6: 00000000000000000000000000000000 (r) R7: 00000000000000000000000000000000 (r) R8: 00000000000000000000000000000000 (r) R9: 00000000000000000000000000000000 (r) DISABLE: 00000000000000000000000000000000 (r) ROUTER: 00000000000000000000000000000000 (r) R_IN: 00000000000000000000000000000000 (p/r) NORTH: 00000000000000000000000000000000 (r) N_IN: 11111111111111110000000000000000 (p) SOUTH: 00000000000000000000000000000000 (r) S_IN: 00000000000000000000000000000000 (p) EAST: 11111111111111110000000000000000 (r) E_IN: 0 (p) W_IN: 1 (p) WEST: 00000000000000000000000000000001 (r) PE "PE_1" Memory Dump # 0, Clock Clycle # 83 0: 00010000 0001ffff ffff0000 xxxxxxxx 4: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 8: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx c: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx unsigned: 65536 x 131071 = 4294901760, overflow = 1 83 clock cycles 38 warnings 23415 simulation events CPU time: 1.4 secs to compile + 0.5 secs to link + 3.6 secs in simulation End of VERILOG-XL 1.6a.4 Jul 14, 1994 19:13:39 module pc6; // pc06-2.v // a Verilog-XL behavioral model of a reconfigurable processor configuration // for a 2's-complement integer shift-add 1x1=1 word multiply with 4 PE's // (using variable-shift EAST & WEST registers) // (highest level module; requires module pe2 in file pe.v) // for Dr. W. B. Ligon, E&CE dept., Clemson U., 1992-4 // by Ken Winiecki // system parameters: `define ADDR_WIDTH 4 // width of PE memory address, in bits parameter WORD_WIDTH = 32, // width of PE word, in bits MEM_LENGTH = 16, // length of PE memory, in words MHZ = 16.7, // clock speed, in "megahertz" NS = 500/MHZ, // clock half-period, in "nanoseconds" ADDR_WIDTH = `ADDR_WIDTH; // Had to use a "`define" above to // circumnavigate a Verilog-XL bug that // prevents parameters from working as // bit length specifiers!!! // variable declarations: reg clock, reset, dump_r_0, dump_m_0, dump_r_1, dump_m_1, dump_r_2, dump_m_2, dump_r_3, dump_m_3, condition_0, condition_1, condition_2, condition_3, negative, init, init_add_0, init_add_2, init_add_3, last_e_out_0, last_e_out_1, last_e_out_2, op_ov_0, op_ov_1, op_ov_2, op_ov_3, overflow; reg [5:0] sh1_4th, sh2_4th, sh3_4th; reg [ADDR_WIDTH+40-1:0] instr_0, instr_1, instr_2, instr_3; wire e_out_0, w_0, e_out_1, w_1, e_out_2, w_2, e_out_3, w_3; wire [9:0] flags_0, flags_1, flags_2, flags_3; wire [WORD_WIDTH-1:0] n1_s0, s0_n1, n2_s1, s1_n2, n3_s2, s2_n3; integer clock_ct; // PE instances and connections: pe2 pe_0 (clock, reset, instr_0, flags_0, 0, n1_s0, 0, w_0, 0, , s0_n1, e_out_0, w_0, , 0,,,,, dump_r_0, dump_m_0); pe2 pe_1 (clock, reset, instr_1, flags_1, s0_n1, n2_s1, 0, w_1, 0, n1_s0, s1_n2, e_out_1, w_1, , 0,,,,, dump_r_1, dump_m_1); pe2 pe_2 (clock, reset, instr_2, flags_2, s1_n2, n3_s2, 0, w_2, 0, n2_s1, s2_n3, e_out_2, w_2, , 0,,,,, dump_r_2, dump_m_2); pe2 pe_3 (clock, reset, instr_3, flags_3, s2_n3, 0, 0, w_3, 0, n3_s2, , e_out_3, w_3, , 0,,,,, dump_r_3, dump_m_3); defparam // set PE-instance parameters to system parameters: pe_0.ADDR_WIDTH = ADDR_WIDTH, pe_0.WORD_WIDTH = WORD_WIDTH, pe_0.MEM_LENGTH = MEM_LENGTH, pe_0.PE_NAME = "PE_0", pe_1.ADDR_WIDTH = ADDR_WIDTH, pe_1.WORD_WIDTH = WORD_WIDTH, pe_1.MEM_LENGTH = MEM_LENGTH, pe_1.PE_NAME = "PE_1", pe_2.ADDR_WIDTH = ADDR_WIDTH, pe_2.WORD_WIDTH = WORD_WIDTH, pe_2.MEM_LENGTH = MEM_LENGTH, pe_2.PE_NAME = "PE_2", pe_3.ADDR_WIDTH = ADDR_WIDTH, pe_3.WORD_WIDTH = WORD_WIDTH, pe_3.MEM_LENGTH = MEM_LENGTH, pe_3.PE_NAME = "PE_3"; // PE instruction fields and bit widths: // mb_addr mb_srce mb_d_mem mb_d_in2 mb_d_in1 src_reg shift ... // (ADDR_WIDTH) 1 1 1 1 4 6 // reg_res dest_reg out_res carry alu_dis alu_dis_s mb_dis mb_dis_s // 8 4 8 2 1 1 1 1 parameter // PE instruction field values: // mb_addr: memory bus transfer address // mb_srce: source of memory bus transfer is ... OUT = 1'b0, // ... register "OUT" MEM = 1'b1, // ... memory // mb_d_mem & mb_d_in2 & mb_d_in1: destinations of memory bus transfers F = 1'b0, // false T = 1'b1, // true // srce_reg & dest_reg: destination and source registers of ALU operations R0 = 4'b0000, R1 = 4'b0001, R2 = 4'b0010, R3 = 4'b0011, R4 = 4'b0100, R5 = 4'b0101, R6 = 4'b0110, R7 = 4'b0111, R8 = 4'b1000, R9 = 4'b1001, RD = 4'b1010, RR = 4'b1011, RN = 4'b1100, RS = 4'b1101, RE = 4'b1110, RW = 4'b1111, // shift: shift east or west source register "shift" bits // reg_res & out_res: ALU operands for "OUT" and destination register results IN1 = 8'b11110000, IN2 = 8'b11001100, SRC = 8'b10101010, NIN1 = 8'b00001111, NIN2 = 8'b00110011, NSRC = 8'b01010101, ZEROS = 8'b00000000, ONES = 8'b11111111, AND12 = 8'b11000000, OR12 = 8'b11111100, XOR12 = 8'b00111100, NAND12 = 8'b00111111, NOR12 = 8'b00000011, NXOR12 = 8'b11000011, AND1S = 8'b10100000, OR1S = 8'b11111010, XOR1S = 8'b01011010, NAND1S = 8'b01011111, NOR1S = 8'b00000101, NXOR1S = 8'b10100101, AND2S = 8'b10001000, OR2S = 8'b11101110, XOR2S = 8'b01100110, NAND2S = 8'b01110111, NOR2S = 8'b00010001, NXOR2S = 8'b10011001, AND12S = 8'b10000000, OR12S = 8'b11111110, XOR12S = 8'b01111110, NAND12S = 8'b01111111, NOR12S = 8'b00000001, NXOR12S = 8'b10000001, SUM12S = 8'b10010110, // carry: // Note: The PE only does EITHER a carry operation OR an ALU operation in // one instruction cycle. A carry word is computed using registers IN_1 // and IN_2 and the specified carry-in, and is made available at the input // from the router, r_in. The carry-out is placed in the carry flag of the // p_flags register. CN = 2'b00, // do not compute carry (do an ALU operation) CC = 2'b01, // compute carry using carry flag for carry-in C0 = 2'b10, // compute carry using 0 for carry-in C1 = 2'b11; // compute carry using 1 for carry-in // alu_dis & mb_dis: allow disabling of ALU/memory bus operation; T or F // alu_dis_i & mb_dis_i: invert PE disable bit for ALU/memory bus op; T or F // convenience parameters and definitions for writing PE instructions: `define M `ADDR_WIDTH'h // when specifying memory address (in hexidecimal) parameter NMEM = { `M 0, MEM, F, F, F }; // when not using memory bus `define SH 6'd // when specifying shift (in decimal) parameter NALU = { R0, `SH 0, SRC, R0, ZEROS }; // when not doing ALU ops // (note that flags are affected and OUT is cleared) parameter NDIS = { F, F, F, F }; // when not allowing any disabling parameter NOOP = { NMEM, NALU, CN, NDIS }; // when squandering clock cycles // (note that flags are affected and OUT is cleared) parameter // bit positions of PE flags // Note that flags other than carry are not affected by a carry operation, // and the carry flag is not affected by an ALU operation. REG_MSB_F = 9, // m.s.b. of destination register REG_ZER_F = 8, // if destination register = 0 OUT_MSB_F = 7, // m.s.b. of register OUT OUT_ZER_F = 6, // if register OUT = 0 IN2_MSB_F = 5, // m.s.b. of register IN_2 IN2_ZER_F = 4, // if register IN_2 = 0 IN1_MSB_F = 3, // m.s.b. of register IN_1 IN1_ZER_F = 2, // if register IN_1 = 0 DISABLE_F = 1, // state of PE disable bit CARRY_F = 0; // result of last carry operation // Write a program in Verilog-XL to control the processing elements directly, // and put it in the "initial" block to execute. Set up the PE memory. // Reset the PE by clocking it's p_reset line. (Clock a line using #NS to // delay each transition.) Build a PE instruction using the above defined // instruction fields and value parameters. Clock the PEs. Monitor the // flags of a PE using the above bit position parameters. Note: Verilog-XL // has no way of dealing realistically with "don't cares," so never allow the // use of "don't knows" (undefined values)!!! initial begin form_feed; $display("2's-complement Integer 1x1=1-word Multiply with 2 PE's"); clock = 0; clock_ct = 0; dump_r_0 = 0; dump_m_0 = 0; dump_r_1 = 0; dump_m_1 = 0; dump_r_2 = 0; dump_m_2 = 0; dump_r_3 = 0; dump_m_3 = 0; pe_0.mem[0] = 32'h1ffff; pe_0.mem[1] = 32'h10000; pe_1.mem[0] = pe_0.mem[0]; pe_1.mem[1] = pe_0.mem[1]; pe_2.mem[0] = pe_0.mem[0]; pe_2.mem[1] = pe_0.mem[1]; pe_3.mem[0] = pe_0.mem[0]; pe_3.mem[1] = pe_0.mem[1]; // all: clear all registers reset_regs; // 0: mem[0] -> in_2 // 1: mem[1] -> in_2 // 2: mem[0] -> in_2 // 3: mem[1] -> in_2 instr_0 = { `M 0, MEM, F, T, F, NALU, CN, NDIS }; instr_1 = { `M 1, MEM, F, T, F, NALU, CN, NDIS }; instr_2 = { `M 0, MEM, F, T, F, NALU, CN, NDIS }; instr_3 = { `M 1, MEM, F, T, F, NALU, CN, NDIS }; clockem; // all: if in_2 negative, then out = ~in_2, in_2 = out, compute carry for // in_2 + 1; otherwise in_2 positive, so compute carry for in_2 + 0 if (flags_0[IN2_MSB_F]) condition_0 = 1; else condition_0 = 0; if (flags_1[IN2_MSB_F]) condition_1 = 1; else condition_1 = 0; if (condition_0^condition_1) negative = 1; else negative = 0; if (condition_0) instr_0 = { NMEM, R0, `SH 0, SRC, R0, NIN2, CN, NDIS }; else instr_0 = { NOOP }; if (condition_1) instr_1 = { NMEM, R0, `SH 0, SRC, R0, NIN2, CN, NDIS }; else instr_1 = { NOOP }; if (condition_0) instr_2 = { NMEM, R0, `SH 0, SRC, R0, NIN2, CN, NDIS }; else instr_2 = { NOOP }; if (condition_1) instr_3 = { NMEM, R0, `SH 0, SRC, R0, NIN2, CN, NDIS }; else instr_3 = { NOOP }; clockem; if (condition_0) instr_0 = { `M 0, OUT, F, T, F, NALU, CN, NDIS }; else instr_0 = { NOOP }; if (condition_1) instr_1 = { `M 0, OUT, F, T, F, NALU, CN, NDIS }; else instr_1 = { NOOP }; if (condition_0) instr_2 = { `M 0, OUT, F, T, F, NALU, CN, NDIS }; else instr_2 = { NOOP }; if (condition_1) instr_3 = { `M 0, OUT, F, T, F, NALU, CN, NDIS }; else instr_3 = { NOOP }; clockem; if (condition_0) instr_0 = { NMEM, NALU, C1, NDIS }; else instr_0 = { NMEM, NALU, C0, NDIS }; if (condition_1) instr_1 = { NMEM, NALU, C1, NDIS }; else instr_1 = { NMEM, NALU, C0, NDIS }; if (condition_0) instr_2 = { NMEM, NALU, C1, NDIS }; else instr_2 = { NMEM, NALU, C0, NDIS }; if (condition_1) instr_3 = { NMEM, NALU, C1, NDIS }; else instr_3 = { NMEM, NALU, C0, NDIS }; clockem; // 0 & 2: south & out = in_2 + c // 1 & 3: north & out = in_2 + c instr_0 = { NMEM, RR, `SH 0, SUM12S, RS, SUM12S, CN, NDIS }; instr_1 = { NMEM, RR, `SH 0, SUM12S, RN, SUM12S, CN, NDIS }; instr_2 = { NMEM, RR, `SH 0, SUM12S, RS, SUM12S, CN, NDIS }; instr_3 = { NMEM, RR, `SH 0, SUM12S, RN, SUM12S, CN, NDIS }; clockem; // 0 & 2: out -> in_2, east = south // 1 & 3: out -> in_2, west = north instr_0 = { `M 0, OUT, F, T, F, RS, `SH 0, SRC, RE, ZEROS, CN, NDIS }; instr_1 = { `M 0, OUT, F, T, F, RN, `SH 0, SRC, RW, ZEROS, CN, NDIS }; instr_2 = { `M 0, OUT, F, T, F, RS, `SH 0, SRC, RE, ZEROS, CN, NDIS }; instr_3 = { `M 0, OUT, F, T, F, RN, `SH 0, SRC, RW, ZEROS, CN, NDIS }; clockem; // 0 & 2: west = in_2 // 1 & 3: east = in_2 instr_0 = { NMEM, R0, `SH 0, IN2, RW, ZEROS, CN, NDIS }; instr_1 = { NMEM, R0, `SH 0, IN2, RE, ZEROS, CN, NDIS }; instr_2 = { NMEM, R0, `SH 0, IN2, RW, ZEROS, CN, NDIS }; instr_3 = { NMEM, R0, `SH 0, IN2, RE, ZEROS, CN, NDIS }; clockem; // 0: west = west shifted right a half word // 1: do nothing // 2: west = west shifted right a quarter word // 3: west = west shifted right three-quarters word sh1_4th = WORD_WIDTH/4; sh2_4th = WORD_WIDTH/2; sh3_4th = 3*WORD_WIDTH/4; // Note must use registers because a "parameter" cannot be created that // is a sized value of an expression, and a "`define" cannot be created // that is a value of an expression!!! instr_0 = { NMEM, RW, sh2_4th, SRC, RW, ZEROS, CN, NDIS }; instr_1 = { NOOP }; instr_2 = { NMEM, RW, sh1_4th, SRC, RW, ZEROS, CN, NDIS }; instr_3 = { NMEM, RW, sh3_4th, SRC, RW, ZEROS, CN, NDIS }; clockem; // 0: east & out = east shifted left a half word with 0's // 1: out = in_2 // 2: east & out = east shifted left a quarter word with 0's // 3: east & out = east shifted left three-quarters word with 0's instr_0 = { NMEM, RE, sh2_4th, SRC, RE, SRC, CN, NDIS }; instr_1 = { NMEM, R0, `SH 0, ZEROS, R0, IN2, CN, NDIS }; instr_2 = { NMEM, RE, sh1_4th, SRC, RE, SRC, CN, NDIS }; instr_3 = { NMEM, RE, sh3_4th, SRC, RE, SRC, CN, NDIS }; clockem; init = 1; init_add_0 = 0; init_add_2 = 0; init_add_3 = 0; op_ov_0 = 0; op_ov_1 = 0; op_ov_2 = 0; op_ov_3 = 0; overflow = 0; repeat (WORD_WIDTH/4) begin // all: out -> in_2, west = west rotated right, clear out instr_0 = { `M 0, OUT, F, T, F, RW, `SH 1, SRC, RW, ZEROS, CN, NDIS }; instr_1 = { `M 0, OUT, F, T, F, RW, `SH 1, SRC, RW, ZEROS, CN, NDIS }; instr_2 = { `M 0, OUT, F, T, F, RW, `SH 1, SRC, RW, ZEROS, CN, NDIS }; instr_3 = { `M 0, OUT, F, T, F, RW, `SH 1, SRC, RW, ZEROS, CN, NDIS }; clockem; // 0 & 1: if op bit = 1, compute carry for in_1 + in_2, south & out // = in_1 + in_2, check overflow; otherwise, south & out = in_1 // 2 & 3: if op bit = 1, compute carry for in_1 + in_2, north & out // = in_1 + in_2, check overflow; otherwise, north & out = in_1 if (flags_0[REG_MSB_F]) condition_0 = 1; else condition_0 = 0; if (flags_1[REG_MSB_F]) condition_1 = 1; else condition_1 = 0; if (flags_2[REG_MSB_F]) condition_2 = 1; else condition_2 = 0; if (flags_3[REG_MSB_F]) condition_3 = 1; else condition_3 = 0; if (condition_0) instr_0 = { NMEM, NALU, C0, NDIS }; else instr_0 = { NOOP }; if (condition_1) instr_1 = { NMEM, NALU, C0, NDIS }; else instr_1 = { NOOP }; if (condition_2) instr_2 = { NMEM, NALU, C0, NDIS }; else instr_2 = { NOOP }; if (condition_3) instr_3 = { NMEM, NALU, C0, NDIS }; else instr_3 = { NOOP }; clockem; if (condition_0) instr_0 = { NMEM, RR, `SH 0, SUM12S, RS, SUM12S, CN, NDIS }; else instr_0 = { NMEM, RR, `SH 0, IN1, RS, IN1, CN, NDIS }; if (condition_1) instr_1 = { NMEM, RR, `SH 0, SUM12S, RS, SUM12S, CN, NDIS }; else instr_1 = { NMEM, RR, `SH 0, IN1, RS, IN1, CN, NDIS }; if (condition_2) instr_2 = { NMEM, RR, `SH 0, SUM12S, RN, SUM12S, CN, NDIS }; else instr_2 = { NMEM, RR, `SH 0, IN1, RN, IN1, CN, NDIS }; if (condition_3) instr_3 = { NMEM, RR, `SH 0, SUM12S, RN, SUM12S, CN, NDIS }; else instr_3 = { NMEM, RR, `SH 0, IN1, RN, IN1, CN, NDIS }; clockem; if ( condition_0 && (flags_0[CARRY_F] || op_ov_0) || condition_1 && (flags_1[CARRY_F] || op_ov_1) || condition_2 && (flags_2[CARRY_F] || op_ov_2) || condition_3 && (flags_3[CARRY_F] || op_ov_3) ) overflow = 1; if (init) begin if (condition_0) init_add_0 = 1; if (condition_2) init_add_2 = 1; if (condition_3) init_add_3 = 1; end init = 0; // check for upcoming overflow possibility // all: out -> in_1, east & out = east shifted left with zero if (e_out_0) op_ov_0 = 1; if (e_out_1) op_ov_1 = 1; if (e_out_2) op_ov_2 = 1; if (e_out_3) op_ov_3 = 1; last_e_out_0 = e_out_0; last_e_out_1 = e_out_1; last_e_out_2 = e_out_2; instr_0 = { `M 0, OUT, F, F, T, RE, `SH 1, SRC, RE, SRC, CN, NDIS }; instr_1 = { `M 0, OUT, F, F, T, RE, `SH 1, SRC, RE, SRC, CN, NDIS }; instr_2 = { `M 0, OUT, F, F, T, RE, `SH 1, SRC, RE, SRC, CN, NDIS }; instr_3 = { `M 0, OUT, F, F, T, RE, `SH 1, SRC, RE, SRC, CN, NDIS }; clockem; end // check for overflow from first iteration // 1: out = north // 2: out = south // 0 & 3: do nothing if ( init_add_0 && last_e_out_2 || init_add_2 && last_e_out_1 || init_add_3 && last_e_out_0 ) overflow = 1; instr_0 = { NOOP }; instr_1 = { NMEM, RN, `SH 0, ZEROS, R0, SRC, CN, NDIS }; instr_2 = { NMEM, RS, `SH 0, ZEROS, R0, SRC, CN, NDIS }; instr_3 = { NOOP }; clockem; // 1 & 2: out -> in_2, clear out // 0 & 3: do nothing instr_1 = { `M 0, OUT, F, T, F, NALU, CN, NDIS }; instr_2 = { `M 0, OUT, F, T, F, NALU, CN, NDIS }; clockem; // 1 & 2: compute carry for in_1 + in_2 // 0 & 3: do nothing instr_1 = { NMEM, NALU, C0, NDIS }; instr_2 = { NMEM, NALU, C0, NDIS }; clockem; // 1: south & out = in_1 + in_2 // 2: north & out = in_1 + in_2 // 0 & 3: do nothing // check overflow instr_1 = { NMEM, RR, `SH 0, SUM12S, RS, SUM12S, CN, NDIS }; instr_2 = { NMEM, RR, `SH 0, SUM12S, RN, SUM12S, CN, NDIS }; clockem; if ( flags_1[CARRY_F] || flags_2[CARRY_F] ) overflow = 1; // 1: out -> in_1, out = south // 2: out -> in_1, out = north // 0 & 3: do nothing instr_1 = { `M 0, OUT, F, F, T, RS, `SH 0, ZEROS, R0, SRC, CN, NDIS }; instr_2 = { `M 0, OUT, F, F, T, RN, `SH 0, ZEROS, R0, SRC, CN, NDIS }; clockem; // 1 & 2: out -> in_2, clear out // 0 & 3: do nothing instr_1 = { `M 0, OUT, F, T, F, NALU, CN, NDIS }; instr_2 = { `M 0, OUT, F, T, F, NALU, CN, NDIS }; clockem; // 1 & 2: compute carry for in_1 + in_2 // 0 & 3: do nothing instr_1 = { NMEM, NALU, C0, NDIS }; instr_2 = { NMEM, NALU, C0, NDIS }; clockem; // 1 & 2: out = in_1 + in_2, clear in_2 // 0 & 3: do nothing // check overflow instr_1 = { `M 0, OUT, F, T, F, RR, `SH 0, ZEROS, R0, SUM12S, CN, NDIS }; instr_2 = { `M 0, OUT, F, T, F, RR, `SH 0, ZEROS, R0, SUM12S, CN, NDIS }; clockem; if ( flags_1[CARRY_F] || flags_2[CARRY_F] ) overflow = 1; if (negative) begin // negate and save result // 1 & 2: out -> in_1 // 0 & 3: do nothing instr_1 = { `M 0, OUT, F, F, T, NALU, CN, NDIS }; instr_2 = { `M 0, OUT, F, F, T, NALU, CN, NDIS }; clockem; // 1 & 2: out = ~in_1 // 0 & 3: do nothing instr_1 = { NMEM, R0, `SH 0, SRC, R0, NIN1, CN, NDIS }; instr_2 = { NMEM, R0, `SH 0, SRC, R0, NIN1, CN, NDIS }; clockem; // 1 & 2: out -> in_1 // 0 & 3: do nothing instr_1 = { `M 0, OUT, F, F, T, NALU, CN, NDIS }; instr_2 = { `M 0, OUT, F, F, T, NALU, CN, NDIS }; clockem; // 1 & 2: compute carry for in_1 + 1 // 0 & 3: do nothing instr_1 = { NMEM, NALU, C1, NDIS }; instr_2 = { NMEM, NALU, C1, NDIS }; clockem; // 1 & 2: out = in_1 + c // 0 & 3: do nothing instr_1 = { NMEM, RR, `SH 0, ZEROS, R0, SUM12S, CN, NDIS }; instr_2 = { NMEM, RR, `SH 0, ZEROS, R0, SUM12S, CN, NDIS }; clockem; // 1 & 2: out -> mem[2] // 0 & 3: do nothing instr_1 = { `M 2, OUT, T, F, F, NALU, CN, NDIS }; instr_2 = { `M 2, OUT, T, F, F, NALU, CN, NDIS }; clockem; end else // positive begin // save result // 1 & 2: out -> mem[2] // 0 & 3: do nothing instr_1 = { `M 2, OUT, T, F, F, NALU, CN, NDIS }; instr_2 = { `M 2, OUT, T, F, F, NALU, CN, NDIS }; clockem; // all: do nothing 5 times instr_0 = { NOOP }; instr_1 = { NOOP }; instr_2 = { NOOP }; instr_3 = { NOOP }; clockem; clockem; clockem; clockem; clockem; end dump; $display("\nPE_1 unsigned: %0d x %0d = %0d, overflow = %0d", pe_1.mem[0], pe_1.mem[1], pe_1.mem[2], overflow); $display("PE_2 unsigned: %0d x %0d = %0d, overflow = %0d", pe_2.mem[0], pe_2.mem[1], pe_2.mem[2], overflow); $display("%0d clock cycles", clock_ct); end task reset_regs; begin #NS reset = 1; #NS reset = 0; end endtask task clockem; begin #NS clock = 1; #NS clock = 0; clock_ct = clock_ct + 1; end endtask task dump; begin #NS dump_r_0 = 1; #NS dump_r_0 = 0; #NS dump_m_0 = 1; #NS dump_m_0 = 0; form_feed; #NS dump_r_1 = 1; #NS dump_r_1 = 0; #NS dump_m_1 = 1; #NS dump_m_1 = 0; form_feed; #NS dump_r_2 = 1; #NS dump_r_2 = 0; #NS dump_m_2 = 1; #NS dump_m_2 = 0; form_feed; #NS dump_r_3 = 1; #NS dump_r_3 = 0; #NS dump_m_3 = 1; #NS dump_m_3 = 0; end endtask task form_feed; $write("\14"); endtask endmodule Host command: verilog Command arguments: pe.v pc06-2.v VERILOG-XL 1.6a.4 log file created Jul 14, 1994 19:13:46 * Copyright Cadence Design Systems, Inc. 1985, 1988. * * All Rights Reserved. Licensed Software. * * Confidential and proprietary information which is the * * property of Cadence Design Systems, Inc. * Compiling source file "pe.v" Compiling source file "pc06-2.v" Highest level modules: pe0 pe pe3 pe4 pc6 2's-complement Integer 1x1=1-word Multiply with 2 PE's PE "PE_3" Reset, Clock Cycle # 0 PE "PE_2" Reset, Clock Cycle # 0 PE "PE_1" Reset, Clock Cycle # 0 PE "PE_0" Reset, Clock Cycle # 0 PE "PE_0" Port and Register Dump # 0, Clock Cycle # 55 PE_INSTR: mb_addr 0000 mb_srce mb_d_mem mb_d_in2 mb_d_in1 srce_reg shift reg_res 1 0 0 0 0000 000000 10101010 dest_reg out_res carry alu_dis alu_dis_i mb_dis mb_dis_i (p) 0000 00000000 00 0 0 0 0 FLAGS: reg<0 reg=0 out<0 out=0 in2<0 in2=0 in1<0 in1=0 0 1 0 1 0 1 0 1 disab carry (r) 0 0 IN1: 00000000000000000000000000000000 (r) IN2: 00000000000000000000000000000000 (r) OUT: 00000000000000000000000000000000 (r) R0: 00000000000000000000000000000000 (r) R1: 00000000000000000000000000000000 (r) R2: 00000000000000000000000000000000 (r) R3: 00000000000000000000000000000000 (r) R4: 00000000000000000000000000000000 (r) R5: 00000000000000000000000000000000 (r) R6: 00000000000000000000000000000000 (r) R7: 00000000000000000000000000000000 (r) R8: 00000000000000000000000000000000 (r) R9: 00000000000000000000000000000000 (r) DISABLE: 00000000000000000000000000000000 (r) ROUTER: 00000000000000000000000000000000 (r) R_IN: 00000000000000000000000000000000 (p/r) NORTH: 00000000000000000000000000000000 (r) N_IN: 00000000000000000000000000000000 (p) SOUTH: 00000000000000000000000000000000 (r) S_IN: 00000000000000010000000000000000 (p) EAST: 00000000000000000000000000000000 (r) E_IN: 0 (p) W_IN: 0 (p) WEST: 00000001111111111111111100000000 (r) PE "PE_0" Memory Dump # 0, Clock Clycle # 55 0: 0001ffff 00010000 xxxxxxxx xxxxxxxx 4: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 8: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx c: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx PE "PE_1" Port and Register Dump # 0, Clock Cycle # 55 PE_INSTR: mb_addr 0000 mb_srce mb_d_mem mb_d_in2 mb_d_in1 srce_reg shift reg_res 1 0 0 0 0000 000000 10101010 dest_reg out_res carry alu_dis alu_dis_i mb_dis mb_dis_i (p) 0000 00000000 00 0 0 0 0 FLAGS: reg<0 reg=0 out<0 out=0 in2<0 in2=0 in1<0 in1=0 0 1 0 1 0 1 0 0 disab carry (r) 0 0 IN1: 00000000111111110000000000000000 (r) IN2: 00000000000000000000000000000000 (r) OUT: 00000000000000000000000000000000 (r) R0: 00000000000000000000000000000000 (r) R1: 00000000000000000000000000000000 (r) R2: 00000000000000000000000000000000 (r) R3: 00000000000000000000000000000000 (r) R4: 00000000000000000000000000000000 (r) R5: 00000000000000000000000000000000 (r) R6: 00000000000000000000000000000000 (r) R7: 00000000000000000000000000000000 (r) R8: 00000000000000000000000000000000 (r) R9: 00000000000000000000000000000000 (r) DISABLE: 00000000000000000000000000000000 (r) ROUTER: 00000000000000000000000000000000 (r) R_IN: 00000000000000000000000000000000 (p/r) NORTH: 00000000000000010000000000000000 (r) N_IN: 00000000000000000000000000000000 (p) SOUTH: 00000000111111110000000000000000 (r) S_IN: 11111111000000000000000000000000 (p) EAST: 00000001000000000000000000000000 (r) E_IN: 0 (p) W_IN: 1 (p) WEST: 11111111000000000000000111111111 (r) PE "PE_1" Memory Dump # 0, Clock Clycle # 55 0: 0001ffff 00010000 ffff0000 xxxxxxxx 4: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 8: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx c: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx PE "PE_2" Port and Register Dump # 0, Clock Cycle # 55 PE_INSTR: mb_addr 0000 mb_srce mb_d_mem mb_d_in2 mb_d_in1 srce_reg shift reg_res 1 0 0 0 0000 000000 10101010 dest_reg out_res carry alu_dis alu_dis_i mb_dis mb_dis_i (p) 0000 00000000 00 0 0 0 0 FLAGS: reg<0 reg=0 out<0 out=0 in2<0 in2=0 in1<0 in1=0 0 1 0 1 0 1 1 0 disab carry (r) 0 0 IN1: 11111111000000000000000000000000 (r) IN2: 00000000000000000000000000000000 (r) OUT: 00000000000000000000000000000000 (r) R0: 00000000000000000000000000000000 (r) R1: 00000000000000000000000000000000 (r) R2: 00000000000000000000000000000000 (r) R3: 00000000000000000000000000000000 (r) R4: 00000000000000000000000000000000 (r) R5: 00000000000000000000000000000000 (r) R6: 00000000000000000000000000000000 (r) R7: 00000000000000000000000000000000 (r) R8: 00000000000000000000000000000000 (r) R9: 00000000000000000000000000000000 (r) DISABLE: 00000000000000000000000000000000 (r) ROUTER: 00000000000000000000000000000000 (r) R_IN: 00000000000000000000000000000000 (p/r) NORTH: 11111111000000000000000000000000 (r) N_IN: 00000000111111110000000000000000 (p) SOUTH: 00000000000000011111111111111111 (r) S_IN: 00000000000000000000000000000000 (p) EAST: 00000000000000000000000000000000 (r) E_IN: 0 (p) W_IN: 1 (p) WEST: 11111111111111110000000000000001 (r) PE "PE_2" Memory Dump # 0, Clock Clycle # 55 0: 0001ffff 00010000 ffff0000 xxxxxxxx 4: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 8: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx c: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx PE "PE_3" Port and Register Dump # 0, Clock Cycle # 55 PE_INSTR: mb_addr 0000 mb_srce mb_d_mem mb_d_in2 mb_d_in1 srce_reg shift reg_res 1 0 0 0 0000 000000 10101010 dest_reg out_res carry alu_dis alu_dis_i mb_dis mb_dis_i (p) 0000 00000000 00 0 0 0 0 FLAGS: reg<0 reg=0 out<0 out=0 in2<0 in2=0 in1<0 in1=0 0 1 0 1 0 1 0 1 disab carry (r) 0 0 IN1: 00000000000000000000000000000000 (r) IN2: 00000000000000000000000000000000 (r) OUT: 00000000000000000000000000000000 (r) R0: 00000000000000000000000000000000 (r) R1: 00000000000000000000000000000000 (r) R2: 00000000000000000000000000000000 (r) R3: 00000000000000000000000000000000 (r) R4: 00000000000000000000000000000000 (r) R5: 00000000000000000000000000000000 (r) R6: 00000000000000000000000000000000 (r) R7: 00000000000000000000000000000000 (r) R8: 00000000000000000000000000000000 (r) R9: 00000000000000000000000000000000 (r) DISABLE: 00000000000000000000000000000000 (r) ROUTER: 00000000000000000000000000000000 (r) R_IN: 00000000000000000000000000000000 (p/r) NORTH: 00000000000000000000000000000000 (r) N_IN: 00000000000000011111111111111111 (p) SOUTH: 00000000000000000000000000000000 (r) S_IN: 00000000000000000000000000000000 (p) EAST: 00000000000000000000000000000000 (r) E_IN: 0 (p) W_IN: 1 (p) WEST: 00000000111111111111111111111111 (r) PE "PE_3" Memory Dump # 0, Clock Clycle # 55 0: 0001ffff 00010000 xxxxxxxx xxxxxxxx 4: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 8: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx c: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx PE_1 unsigned: 131071 x 65536 = 4294901760, overflow = 1 PE_2 unsigned: 131071 x 65536 = 4294901760, overflow = 1 55 clock cycles 42 warnings 29135 simulation events CPU time: 1.5 secs to compile + 0.7 secs to link + 4.8 secs in simulation End of VERILOG-XL 1.6a.4 Jul 14, 1994 19:13:53 module pc7; // pc07-2.v // a Verilog-XL behavioral model of a reconfigurable processor configuration // for a 2's-complement integer shift-add 1x1=1 word multiply with 8 PE's // (using variable-shift EAST & WEST registers) // (highest level module; requires module pe2 in file pe.v) // for Dr. W. B. Ligon, E&CE dept., Clemson U., 1992-4 // by Ken Winiecki // system parameters: `define ADDR_WIDTH 4 // width of PE memory address, in bits parameter WORD_WIDTH = 32, // width of PE word, in bits MEM_LENGTH = 16, // length of PE memory, in words MHZ = 16.7, // clock speed, in "megahertz" NS = 500/MHZ, // clock half-period, in "nanoseconds" ADDR_WIDTH = `ADDR_WIDTH; // Had to use a "`define" above to // circumnavigate a Verilog-XL bug that // prevents parameters from working as // bit length specifiers!!! // variable declarations: reg clock, reset, dump_r_0, dump_m_0, dump_r_1, dump_m_1, dump_r_2, dump_m_2, dump_r_3, dump_m_3, dump_r_4, dump_m_4, dump_r_5, dump_m_5, dump_r_6, dump_m_6, dump_r_7, dump_m_7, condition_0, condition_1, condition_2, condition_3, condition_4, condition_5, condition_6, condition_7, negative, init, init_add_0, init_add_2, init_add_3, init_add_4, init_add_5, init_add_6, init_add_7, last_e_out_0, last_e_out_1, last_e_out_2, last_e_out_3, last_e_out_4, last_e_out_5, last_e_out_6, op_ov_0, op_ov_1, op_ov_2, op_ov_3, op_ov_4, op_ov_5, op_ov_6, op_ov_7, overflow; reg [5:0] sh1_8th, sh2_8th, sh3_8th, sh4_8th, sh5_8th, sh6_8th, sh7_8th; reg [ADDR_WIDTH+40-1:0] instr_0, instr_1, instr_2, instr_3, instr_4, instr_5, instr_6, instr_7; wire e_out_0, w_0, e_out_1, w_1, e_out_2, w_2, e_out_3, w_3, e_out_4, w_4, e_out_5, w_5, e_out_6, w_6, e_out_7, w_7; wire [9:0] flags_0, flags_1, flags_2, flags_3, flags_4, flags_5, flags_6, flags_7; wire [WORD_WIDTH-1:0] n1_s0, s0_n1, n2_s1, s1_n2, n3_s2, s2_n3, n4_s3, s3_n4, n5_s4, s4_n5, n6_s5, s5_n6, n7_s6, s6_n7; integer clock_ct; // PE instances and connections: pe2 pe_0 (clock, reset, instr_0, flags_0, 0, n1_s0, 0, w_0, 0, , s0_n1, e_out_0, w_0, , 0,,,,, dump_r_0, dump_m_0); pe2 pe_1 (clock, reset, instr_1, flags_1, s0_n1, n2_s1, 0, w_1, 0, n1_s0, s1_n2, e_out_1, w_1, , 0,,,,, dump_r_1, dump_m_1); pe2 pe_2 (clock, reset, instr_2, flags_2, s1_n2, n3_s2, 0, w_2, 0, n2_s1, s2_n3, e_out_2, w_2, , 0,,,,, dump_r_2, dump_m_2); pe2 pe_3 (clock, reset, instr_3, flags_3, s2_n3, n4_s3, 0, w_3, 0, n3_s2, s3_n4, e_out_3, w_3, , 0,,,,, dump_r_3, dump_m_3); pe2 pe_4 (clock, reset, instr_4, flags_4, s3_n4, n5_s4, 0, w_4, 0, n4_s3, s4_n5, e_out_4, w_4, , 0,,,,, dump_r_4, dump_m_4); pe2 pe_5 (clock, reset, instr_5, flags_5, s4_n5, n6_s5, 0, w_5, 0, n5_s4, s5_n6, e_out_5, w_5, , 0,,,,, dump_r_5, dump_m_5); pe2 pe_6 (clock, reset, instr_6, flags_6, s5_n6, n7_s6, 0, w_6, 0, n6_s5, s6_n7, e_out_6, w_6, , 0,,,,, dump_r_6, dump_m_6); pe2 pe_7 (clock, reset, instr_7, flags_7, s6_n7, 0, 0, w_7, 0, n7_s6, , e_out_7, w_7, , 0,,,,, dump_r_7, dump_m_7); defparam // set PE-instance parameters to system parameters: pe_0.ADDR_WIDTH = ADDR_WIDTH, pe_0.WORD_WIDTH = WORD_WIDTH, pe_0.MEM_LENGTH = MEM_LENGTH, pe_0.PE_NAME = "PE_0", pe_1.ADDR_WIDTH = ADDR_WIDTH, pe_1.WORD_WIDTH = WORD_WIDTH, pe_1.MEM_LENGTH = MEM_LENGTH, pe_1.PE_NAME = "PE_1", pe_2.ADDR_WIDTH = ADDR_WIDTH, pe_2.WORD_WIDTH = WORD_WIDTH, pe_2.MEM_LENGTH = MEM_LENGTH, pe_2.PE_NAME = "PE_2", pe_3.ADDR_WIDTH = ADDR_WIDTH, pe_3.WORD_WIDTH = WORD_WIDTH, pe_3.MEM_LENGTH = MEM_LENGTH, pe_3.PE_NAME = "PE_3", pe_4.ADDR_WIDTH = ADDR_WIDTH, pe_4.WORD_WIDTH = WORD_WIDTH, pe_4.MEM_LENGTH = MEM_LENGTH, pe_4.PE_NAME = "PE_4", pe_5.ADDR_WIDTH = ADDR_WIDTH, pe_5.WORD_WIDTH = WORD_WIDTH, pe_5.MEM_LENGTH = MEM_LENGTH, pe_5.PE_NAME = "PE_5", pe_6.ADDR_WIDTH = ADDR_WIDTH, pe_6.WORD_WIDTH = WORD_WIDTH, pe_6.MEM_LENGTH = MEM_LENGTH, pe_6.PE_NAME = "PE_6", pe_7.ADDR_WIDTH = ADDR_WIDTH, pe_7.WORD_WIDTH = WORD_WIDTH, pe_7.MEM_LENGTH = MEM_LENGTH, pe_7.PE_NAME = "PE_7"; // PE instruction fields and bit widths: // mb_addr mb_srce mb_d_mem mb_d_in2 mb_d_in1 src_reg shift ... // (ADDR_WIDTH) 1 1 1 1 4 6 // reg_res dest_reg out_res carry alu_dis alu_dis_s mb_dis mb_dis_s // 8 4 8 2 1 1 1 1 parameter // PE instruction field values: // mb_addr: memory bus transfer address // mb_srce: source of memory bus transfer is ... OUT = 1'b0, // ... register "OUT" MEM = 1'b1, // ... memory // mb_d_mem & mb_d_in2 & mb_d_in1: destinations of memory bus transfers F = 1'b0, // false T = 1'b1, // true // srce_reg & dest_reg: destination and source registers of ALU operations R0 = 4'b0000, R1 = 4'b0001, R2 = 4'b0010, R3 = 4'b0011, R4 = 4'b0100, R5 = 4'b0101, R6 = 4'b0110, R7 = 4'b0111, R8 = 4'b1000, R9 = 4'b1001, RD = 4'b1010, RR = 4'b1011, RN = 4'b1100, RS = 4'b1101, RE = 4'b1110, RW = 4'b1111, // shift: shift east or west source register "shift" bits // reg_res & out_res: ALU operands for "OUT" and destination register results IN1 = 8'b11110000, IN2 = 8'b11001100, SRC = 8'b10101010, NIN1 = 8'b00001111, NIN2 = 8'b00110011, NSRC = 8'b01010101, ZEROS = 8'b00000000, ONES = 8'b11111111, AND12 = 8'b11000000, OR12 = 8'b11111100, XOR12 = 8'b00111100, NAND12 = 8'b00111111, NOR12 = 8'b00000011, NXOR12 = 8'b11000011, AND1S = 8'b10100000, OR1S = 8'b11111010, XOR1S = 8'b01011010, NAND1S = 8'b01011111, NOR1S = 8'b00000101, NXOR1S = 8'b10100101, AND2S = 8'b10001000, OR2S = 8'b11101110, XOR2S = 8'b01100110, NAND2S = 8'b01110111, NOR2S = 8'b00010001, NXOR2S = 8'b10011001, AND12S = 8'b10000000, OR12S = 8'b11111110, XOR12S = 8'b01111110, NAND12S = 8'b01111111, NOR12S = 8'b00000001, NXOR12S = 8'b10000001, SUM12S = 8'b10010110, // carry: // Note: The PE only does EITHER a carry operation OR an ALU operation in // one instruction cycle. A carry word is computed using registers IN_1 // and IN_2 and the specified carry-in, and is made available at the input // from the router, r_in. The carry-out is placed in the carry flag of the // p_flags register. CN = 2'b00, // do not compute carry (do an ALU operation) CC = 2'b01, // compute carry using carry flag for carry-in C0 = 2'b10, // compute carry using 0 for carry-in C1 = 2'b11; // compute carry using 1 for carry-in // alu_dis & mb_dis: allow disabling of ALU/memory bus operation; T or F // alu_dis_i & mb_dis_i: invert PE disable bit for ALU/memory bus op; T or F // convenience parameters and definitions for writing PE instructions: `define M `ADDR_WIDTH'h // when specifying memory address (in hexidecimal) parameter NMEM = { `M 0, MEM, F, F, F }; // when not using memory bus `define SH 6'd // when specifying shift (in decimal) parameter NALU = { R0, `SH 0, SRC, R0, ZEROS }; // when not doing ALU ops // (note that flags are affected and OUT is cleared) parameter NDIS = { F, F, F, F }; // when not allowing any disabling parameter NOOP = { NMEM, NALU, CN, NDIS }; // when squandering clock cycles // (note that flags are affected and OUT is cleared) parameter // bit positions of PE flags // Note that flags other than carry are not affected by a carry operation, // and the carry flag is not affected by an ALU operation. REG_MSB_F = 9, // m.s.b. of destination register REG_ZER_F = 8, // if destination register = 0 OUT_MSB_F = 7, // m.s.b. of register OUT OUT_ZER_F = 6, // if register OUT = 0 IN2_MSB_F = 5, // m.s.b. of register IN_2 IN2_ZER_F = 4, // if register IN_2 = 0 IN1_MSB_F = 3, // m.s.b. of register IN_1 IN1_ZER_F = 2, // if register IN_1 = 0 DISABLE_F = 1, // state of PE disable bit CARRY_F = 0; // result of last carry operation // Write a program in Verilog-XL to control the processing elements directly, // and put it in the "initial" block to execute. Set up the PE memory. // Reset the PE by clocking it's p_reset line. (Clock a line using #NS to // delay each transition.) Build a PE instruction using the above defined // instruction fields and value parameters. Clock the PEs. Monitor the // flags of a PE using the above bit position parameters. Note: Verilog-XL // has no way of dealing realistically with "don't cares," so never allow the // use of "don't knows" (undefined values)!!! initial begin form_feed; $display("2's-complement Integer 1x1=1-word Multiply with 2 PE's"); clock = 0; clock_ct = 0; dump_r_0 = 0; dump_m_0 = 0; dump_r_1 = 0; dump_m_1 = 0; dump_r_2 = 0; dump_m_2 = 0; dump_r_3 = 0; dump_m_3 = 0; dump_r_4 = 0; dump_m_4 = 0; dump_r_5 = 0; dump_m_5 = 0; dump_r_6 = 0; dump_m_6 = 0; dump_r_7 = 0; dump_m_7 = 0; pe_0.mem[0] = 32'h1ffff; pe_0.mem[1] = 32'h10000; pe_1.mem[0] = pe_0.mem[0]; pe_1.mem[1] = pe_0.mem[1]; pe_2.mem[0] = pe_0.mem[0]; pe_2.mem[1] = pe_0.mem[1]; pe_3.mem[0] = pe_0.mem[0]; pe_3.mem[1] = pe_0.mem[1]; pe_4.mem[0] = pe_0.mem[0]; pe_4.mem[1] = pe_0.mem[1]; pe_5.mem[0] = pe_0.mem[0]; pe_5.mem[1] = pe_0.mem[1]; pe_6.mem[0] = pe_0.mem[0]; pe_6.mem[1] = pe_0.mem[1]; pe_7.mem[0] = pe_0.mem[0]; pe_7.mem[1] = pe_0.mem[1]; // all: clear all registers reset_regs; // 0,2,4,6: mem[0] -> in_2 // 1,3,5,7: mem[1] -> in_2 instr_0 = { `M 0, MEM, F, T, F, NALU, CN, NDIS }; instr_1 = { `M 1, MEM, F, T, F, NALU, CN, NDIS }; instr_2 = instr_0; instr_3 = instr_1; instr_4 = instr_0; instr_5 = instr_1; instr_6 = instr_0; instr_7 = instr_1; clockem; // all: if in_2 negative, then out = ~in_2, in_2 = out, compute carry for // in_2 + 1; otherwise in_2 positive, so compute carry for in_2 + 0 if (flags_0[IN2_MSB_F]) condition_0 = 1; else condition_0 = 0; if (flags_1[IN2_MSB_F]) condition_1 = 1; else condition_1 = 0; if (condition_0^condition_1) negative = 1; else negative = 0; if (condition_0) instr_0 = { NMEM, R0, `SH 0, SRC, R0, NIN2, CN, NDIS }; else instr_0 = { NOOP }; if (condition_1) instr_1 = { NMEM, R0, `SH 0, SRC, R0, NIN2, CN, NDIS }; else instr_1 = { NOOP }; instr_2 = instr_0; instr_3 = instr_1; instr_4 = instr_0; instr_5 = instr_1; instr_6 = instr_0; instr_7 = instr_1; clockem; if (condition_0) instr_0 = { `M 0, OUT, F, T, F, NALU, CN, NDIS }; else instr_0 = { NOOP }; if (condition_1) instr_1 = { `M 0, OUT, F, T, F, NALU, CN, NDIS }; else instr_1 = { NOOP }; instr_2 = instr_0; instr_3 = instr_1; instr_4 = instr_0; instr_5 = instr_1; instr_6 = instr_0; instr_7 = instr_1; clockem; if (condition_0) instr_0 = { NMEM, NALU, C1, NDIS }; else instr_0 = { NMEM, NALU, C0, NDIS }; if (condition_1) instr_1 = { NMEM, NALU, C1, NDIS }; else instr_1 = { NMEM, NALU, C0, NDIS }; instr_2 = instr_0; instr_3 = instr_1; instr_4 = instr_0; instr_5 = instr_1; instr_6 = instr_0; instr_7 = instr_1; clockem; // 0,2,4,6: south & out = in_2 + c // 1,3,5,7: north & out = in_2 + c instr_0 = { NMEM, RR, `SH 0, SUM12S, RS, SUM12S, CN, NDIS }; instr_1 = { NMEM, RR, `SH 0, SUM12S, RN, SUM12S, CN, NDIS }; instr_2 = instr_0; instr_3 = instr_1; instr_4 = instr_0; instr_5 = instr_1; instr_6 = instr_0; instr_7 = instr_1; clockem; // 0,2,4,6: out -> in_2, east = south // 1,3,5,7: out -> in_2, west = north instr_0 = { `M 0, OUT, F, T, F, RS, `SH 0, SRC, RE, ZEROS, CN, NDIS }; instr_1 = { `M 0, OUT, F, T, F, RN, `SH 0, SRC, RW, ZEROS, CN, NDIS }; instr_2 = instr_0; instr_3 = instr_1; instr_4 = instr_0; instr_5 = instr_1; instr_6 = instr_0; instr_7 = instr_1; clockem; // 0,2,4,6: west = in_2 // 1,3,5,7: east = in_2 instr_0 = { NMEM, R0, `SH 0, IN2, RW, ZEROS, CN, NDIS }; instr_1 = { NMEM, R0, `SH 0, IN2, RE, ZEROS, CN, NDIS }; instr_2 = instr_0; instr_3 = instr_1; instr_4 = instr_0; instr_5 = instr_1; instr_6 = instr_0; instr_7 = instr_1; clockem; // 0: west = west shifted right 1/2 word // 1: do nothing // 2: west = west shifted right 1/4 word // 3: west = west shifted right 3/4 word // 4: west = west shifted right 1/8 word // 5: west = west shifted right 3/8 word // 6: west = west shifted right 5/8 word // 7: west = west shifted right 7/8 word sh1_8th = WORD_WIDTH/8; sh2_8th = WORD_WIDTH/4; sh3_8th = 3*WORD_WIDTH/8; sh4_8th = WORD_WIDTH/2; sh5_8th = 5*WORD_WIDTH/8; sh6_8th = 3*WORD_WIDTH/4; sh7_8th = 7*WORD_WIDTH/8; // Note must use registers because a "parameter" cannot be created that // is a sized value of an expression, and a "`define" cannot be created // that is a value of an expression!!! instr_0 = { NMEM, RW, sh4_8th, SRC, RW, ZEROS, CN, NDIS }; instr_1 = { NOOP }; instr_2 = { NMEM, RW, sh2_8th, SRC, RW, ZEROS, CN, NDIS }; instr_3 = { NMEM, RW, sh6_8th, SRC, RW, ZEROS, CN, NDIS }; instr_4 = { NMEM, RW, sh1_8th, SRC, RW, ZEROS, CN, NDIS }; instr_5 = { NMEM, RW, sh3_8th, SRC, RW, ZEROS, CN, NDIS }; instr_6 = { NMEM, RW, sh5_8th, SRC, RW, ZEROS, CN, NDIS }; instr_7 = { NMEM, RW, sh7_8th, SRC, RW, ZEROS, CN, NDIS }; clockem; // 0: east & out = east shifted left 1/2 word with 0's // 1: out = in_2 // 2: east & out = east shifted left 1/4 word with 0's // 3: east & out = east shifted left 3/4 word with 0's // 4: east & out = east shifted left 1/8 word with 0's // 5: east & out = east shifted left 3/8 word with 0's // 6: east & out = east shifted left 5/8 word with 0's // 7: east & out = east shifted left 7/8 word with 0's instr_0 = { NMEM, RE, sh4_8th, SRC, RE, SRC, CN, NDIS }; instr_1 = { NMEM, R0, `SH 0, ZEROS, R0, IN2, CN, NDIS }; instr_2 = { NMEM, RE, sh2_8th, SRC, RE, SRC, CN, NDIS }; instr_3 = { NMEM, RE, sh6_8th, SRC, RE, SRC, CN, NDIS }; instr_4 = { NMEM, RE, sh1_8th, SRC, RE, SRC, CN, NDIS }; instr_5 = { NMEM, RE, sh3_8th, SRC, RE, SRC, CN, NDIS }; instr_6 = { NMEM, RE, sh5_8th, SRC, RE, SRC, CN, NDIS }; instr_7 = { NMEM, RE, sh7_8th, SRC, RE, SRC, CN, NDIS }; clockem; init = 1; init_add_0 = 0; init_add_2 = 0; init_add_3 = 0; init_add_4 = 0; init_add_5 = 0; init_add_6 = 0; init_add_7 = 0; op_ov_0 = 0; op_ov_1 = 0; op_ov_2 = 0; op_ov_3 = 0; op_ov_4 = 0; op_ov_5 = 0; op_ov_6 = 0; op_ov_7 = 0; overflow = 0; repeat (WORD_WIDTH/8) begin // all: out -> in_2, west = west rotated right, clear out instr_0 = { `M 0, OUT, F, T, F, RW, `SH 1, SRC, RW, ZEROS, CN, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; instr_4 = instr_0; instr_5 = instr_0; instr_6 = instr_0; instr_7 = instr_0; clockem; // 0,2,4,6: if op bit = 1, compute carry for in_1 + in_2, south & out // = in_1 + in_2, check overflow; otherwise, south & out = in_1 // 1,3,5,7: if op bit = 1, compute carry for in_1 + in_2, north & out // = in_1 + in_2, check overflow; otherwise, north & out = in_1 if (flags_0[REG_MSB_F]) condition_0 = 1; else condition_0 = 0; if (flags_1[REG_MSB_F]) condition_1 = 1; else condition_1 = 0; if (flags_2[REG_MSB_F]) condition_2 = 1; else condition_2 = 0; if (flags_3[REG_MSB_F]) condition_3 = 1; else condition_3 = 0; if (flags_4[REG_MSB_F]) condition_4 = 1; else condition_4 = 0; if (flags_5[REG_MSB_F]) condition_5 = 1; else condition_5 = 0; if (flags_6[REG_MSB_F]) condition_6 = 1; else condition_6 = 0; if (flags_7[REG_MSB_F]) condition_7 = 1; else condition_7 = 0; if (condition_0) instr_0 = { NMEM, NALU, C0, NDIS }; else instr_0 = { NOOP }; if (condition_1) instr_1 = { NMEM, NALU, C0, NDIS }; else instr_1 = { NOOP }; if (condition_2) instr_2 = { NMEM, NALU, C0, NDIS }; else instr_2 = { NOOP }; if (condition_3) instr_3 = { NMEM, NALU, C0, NDIS }; else instr_3 = { NOOP }; if (condition_4) instr_4 = { NMEM, NALU, C0, NDIS }; else instr_4 = { NOOP }; if (condition_5) instr_5 = { NMEM, NALU, C0, NDIS }; else instr_5 = { NOOP }; if (condition_6) instr_6 = { NMEM, NALU, C0, NDIS }; else instr_6 = { NOOP }; if (condition_7) instr_7 = { NMEM, NALU, C0, NDIS }; else instr_7 = { NOOP }; clockem; if (condition_0) instr_0 = { NMEM, RR, `SH 0, SUM12S, RS, SUM12S, CN, NDIS }; else instr_0 = { NMEM, RR, `SH 0, IN1, RS, IN1, CN, NDIS }; if (condition_1) instr_1 = { NMEM, RR, `SH 0, SUM12S, RN, SUM12S, CN, NDIS }; else instr_1 = { NMEM, RR, `SH 0, IN1, RN, IN1, CN, NDIS }; if (condition_2) instr_2 = { NMEM, RR, `SH 0, SUM12S, RS, SUM12S, CN, NDIS }; else instr_2 = { NMEM, RR, `SH 0, IN1, RS, IN1, CN, NDIS }; if (condition_3) instr_3 = { NMEM, RR, `SH 0, SUM12S, RN, SUM12S, CN, NDIS }; else instr_3 = { NMEM, RR, `SH 0, IN1, RN, IN1, CN, NDIS }; if (condition_4) instr_4 = { NMEM, RR, `SH 0, SUM12S, RS, SUM12S, CN, NDIS }; else instr_4 = { NMEM, RR, `SH 0, IN1, RS, IN1, CN, NDIS }; if (condition_5) instr_5 = { NMEM, RR, `SH 0, SUM12S, RN, SUM12S, CN, NDIS }; else instr_5 = { NMEM, RR, `SH 0, IN1, RN, IN1, CN, NDIS }; if (condition_6) instr_6 = { NMEM, RR, `SH 0, SUM12S, RS, SUM12S, CN, NDIS }; else instr_6 = { NMEM, RR, `SH 0, IN1, RS, IN1, CN, NDIS }; if (condition_7) instr_7 = { NMEM, RR, `SH 0, SUM12S, RN, SUM12S, CN, NDIS }; else instr_7 = { NMEM, RR, `SH 0, IN1, RN, IN1, CN, NDIS }; clockem; if ( condition_0 && (flags_0[CARRY_F] || op_ov_0) || condition_1 && (flags_1[CARRY_F] || op_ov_1) || condition_2 && (flags_2[CARRY_F] || op_ov_2) || condition_3 && (flags_3[CARRY_F] || op_ov_3) || condition_4 && (flags_4[CARRY_F] || op_ov_4) || condition_5 && (flags_5[CARRY_F] || op_ov_5) || condition_6 && (flags_6[CARRY_F] || op_ov_6) || condition_7 && (flags_7[CARRY_F] || op_ov_7) ) overflow = 1; if (init) begin init = 0; if (condition_0) init_add_0 = 1; if (condition_2) init_add_2 = 1; if (condition_3) init_add_3 = 1; if (condition_4) init_add_4 = 1; if (condition_5) init_add_5 = 1; if (condition_6) init_add_6 = 1; if (condition_7) init_add_7 = 1; end // check for upcoming overflow possibility // all: out -> in_1, east & out = east shifted left with zero if (e_out_0) op_ov_0 = 1; if (e_out_1) op_ov_1 = 1; if (e_out_2) op_ov_2 = 1; if (e_out_3) op_ov_3 = 1; if (e_out_4) op_ov_4 = 1; if (e_out_5) op_ov_5 = 1; if (e_out_6) op_ov_6 = 1; if (e_out_7) op_ov_7 = 1; last_e_out_0 = e_out_0; last_e_out_1 = e_out_1; last_e_out_2 = e_out_2; last_e_out_3 = e_out_3; last_e_out_4 = e_out_4; last_e_out_5 = e_out_5; last_e_out_6 = e_out_6; instr_0 = { `M 0, OUT, F, F, T, RE, `SH 1, SRC, RE, SRC, CN, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; instr_4 = instr_0; instr_5 = instr_0; instr_6 = instr_0; instr_7 = instr_0; clockem; end // check for overflow from first iteration // 1,5: out = north // 2,6: out = south // 0,3,4,7: do nothing if ( init_add_0 && last_e_out_5 || init_add_2 && last_e_out_4 || init_add_3 && last_e_out_6 || init_add_4 && last_e_out_1 || init_add_5 && last_e_out_2 || init_add_6 && last_e_out_0 || init_add_7 && last_e_out_3 ) overflow = 1; instr_0 = { NOOP }; instr_1 = { NMEM, RN, `SH 0, ZEROS, R0, SRC, CN, NDIS }; instr_2 = { NMEM, RS, `SH 0, ZEROS, R0, SRC, CN, NDIS }; instr_3 = instr_0; instr_4 = instr_0; instr_5 = instr_1; instr_6 = instr_2; instr_7 = instr_0; clockem; // 1,2,5,6: out -> in_2 // 0,3,4,7: do nothing instr_1 = { `M 0, OUT, F, T, F, NALU, CN, NDIS }; instr_2 = instr_1; instr_5 = instr_1; instr_6 = instr_1; clockem; // 1,2,5,6: compute carry for in_1 + in_2 // 0,3,4,7: do nothing instr_1 = { NMEM, NALU, C0, NDIS }; instr_2 = instr_1; instr_5 = instr_1; instr_6 = instr_1; clockem; // 1,5: south & out = in_1 + in_2 // 2,6: north & out = in_1 + in_2 // 0,3,4,7: do nothing // check overflow instr_1 = { NMEM, RR, `SH 0, SUM12S, RS, SUM12S, CN, NDIS }; instr_2 = { NMEM, RR, `SH 0, SUM12S, RN, SUM12S, CN, NDIS }; instr_5 = instr_1; instr_6 = instr_2; clockem; if ( flags_1[CARRY_F] || flags_2[CARRY_F] || flags_5[CARRY_F] || flags_6[CARRY_F] ) overflow = 1; // 2: out -> in_1, out = north // 5: out -> in_1, out = south // 0,1,3,4,6,7: do nothing instr_1 = { NOOP }; instr_2 = { `M 0, OUT, F, F, T, RN, `SH 0, ZEROS, R0, SRC, CN, NDIS }; instr_5 = { `M 0, OUT, F, F, T, RS, `SH 0, ZEROS, R0, SRC, CN, NDIS }; instr_6 = instr_1; clockem; // 2,5: out -> in_2 // 0,1,3,4,6,7: do nothing instr_2 = { `M 0, OUT, F, T, F, NALU, CN, NDIS }; instr_5 = instr_2; clockem; // 2,5: compute carry for in_1 + in_2 // 0,1,3,4,6,7: do nothing instr_2 = { NMEM, NALU, C0, NDIS }; instr_5 = instr_2; clockem; // 2: south = in_1 + in_2 // 5: north = in_1 + in_2 // 0,1,3,4,6,7: do nothing // check overflow instr_2 = { NMEM, RR, `SH 0, SUM12S, RS, ZEROS, CN, NDIS }; instr_5 = { NMEM, RR, `SH 0, SUM12S, RN, ZEROS, CN, NDIS }; clockem; if ( flags_2[CARRY_F] || flags_5[CARRY_F] ) overflow = 1; // 3: south & out = north // 4: north & out = south // 0,1,2,5,6,7: do nothing instr_2 = { NOOP }; instr_3 = { NMEM, RN, `SH 0, SRC, RS, SRC, CN, NDIS }; instr_4 = { NMEM, RS, `SH 0, SRC, RN, SRC, CN, NDIS }; instr_5 = instr_2; clockem; // 3: out -> in_1, out = south // 4: out -> in_1, out = north // 0,1,2,5,6,7: do nothing instr_3 = { `M 0, OUT, F, F, T, RS, `SH 0, ZEROS, R0, SRC, CN, NDIS }; instr_4 = { `M 0, OUT, F, F, T, RN, `SH 0, ZEROS, R0, SRC, CN, NDIS }; clockem; // 3,4: out -> in_2, clear out // 0,1,2,5,6,7: do nothing instr_3 = { `M 0, OUT, F, T, F, NALU, CN, NDIS }; instr_4 = instr_3; clockem; // 3,4: compute carry for in_1 + in_2 // 0,1,2,5,6,7: do nothing instr_3 = { NMEM, NALU, C0, NDIS }; instr_4 = instr_3; clockem; // 3,4: out = in_1 + in_2, clear in_2 // 0,1,2,5,6,7: do nothing // check overflow instr_3 = { `M 0, OUT, F, T, F, RR, `SH 0, ZEROS, R0, SUM12S, CN, NDIS }; instr_4 = instr_3; clockem; if ( flags_3[CARRY_F] || flags_4[CARRY_F] ) overflow = 1; if (negative) begin // negate and save result // 3,4: out -> in_1 // 0,1,2,5,6,7: do nothing instr_3 = { `M 0, OUT, F, F, T, NALU, CN, NDIS }; instr_4 = instr_3; clockem; // 3,4: out = ~in_1 // 0,1,2,5,6,7: do nothing instr_3 = { NMEM, R0, `SH 0, SRC, R0, NIN1, CN, NDIS }; instr_4 = instr_3; clockem; // 3,4: out -> in_1 // 0,1,2,5,6,7: do nothing instr_3 = { `M 0, OUT, F, F, T, NALU, CN, NDIS }; instr_4 = instr_3; clockem; // 3,4: compute carry for in_1 + 1 // 0,1,2,5,6,7: do nothing instr_3 = { NMEM, NALU, C1, NDIS }; instr_4 = instr_3; clockem; // 3,4: out = in_1 + c // 0,1,2,5,6,7: do nothing instr_3 = { NMEM, RR, `SH 0, ZEROS, R0, SUM12S, CN, NDIS }; instr_4 = instr_3; clockem; // 3,4: out -> mem[2] // 0,1,2,5,6,7: do nothing instr_3 = { `M 2, OUT, T, F, F, NALU, CN, NDIS }; instr_4 = instr_3; clockem; end else // positive begin // save result // 3,4: out -> mem[2] // 0,1,2,5,6,7: do nothing instr_3 = { `M 2, OUT, T, F, F, NALU, CN, NDIS }; instr_4 = instr_3; clockem; // all: do nothing 5 times instr_3 = { NOOP }; instr_4 = instr_3; clockem; clockem; clockem; clockem; clockem; end dump; $display("\nPE_3 unsigned: %0d x %0d = %0d, overflow = %0d", pe_3.mem[0], pe_3.mem[1], pe_3.mem[2], overflow); $display("PE_4 unsigned: %0d x %0d = %0d, overflow = %0d", pe_4.mem[0], pe_4.mem[1], pe_4.mem[2], overflow); $display("%0d clock cycles", clock_ct); end task reset_regs; begin #NS reset = 1; #NS reset = 0; end endtask task clockem; begin #NS clock = 1; #NS clock = 0; clock_ct = clock_ct + 1; end endtask task dump; begin #NS dump_r_0 = 1; #NS dump_r_0 = 0; #NS dump_m_0 = 1; #NS dump_m_0 = 0; form_feed; #NS dump_r_1 = 1; #NS dump_r_1 = 0; #NS dump_m_1 = 1; #NS dump_m_1 = 0; form_feed; #NS dump_r_2 = 1; #NS dump_r_2 = 0; #NS dump_m_2 = 1; #NS dump_m_2 = 0; form_feed; #NS dump_r_3 = 1; #NS dump_r_3 = 0; #NS dump_m_3 = 1; #NS dump_m_3 = 0; form_feed; #NS dump_r_4 = 1; #NS dump_r_4 = 0; #NS dump_m_4 = 1; #NS dump_m_4 = 0; form_feed; #NS dump_r_5 = 1; #NS dump_r_5 = 0; #NS dump_m_5 = 1; #NS dump_m_5 = 0; form_feed; #NS dump_r_6 = 1; #NS dump_r_6 = 0; #NS dump_m_6 = 1; #NS dump_m_6 = 0; form_feed; #NS dump_r_7 = 1; #NS dump_r_7 = 0; #NS dump_m_7 = 1; #NS dump_m_7 = 0; end endtask task form_feed; $write("\14"); endtask endmodule Host command: verilog Command arguments: pe.v pc07-2.v VERILOG-XL 1.6a.4 log file created Jul 14, 1994 19:13:59 * Copyright Cadence Design Systems, Inc. 1985, 1988. * * All Rights Reserved. Licensed Software. * * Confidential and proprietary information which is the * * property of Cadence Design Systems, Inc. * Compiling source file "pe.v" Compiling source file "pc07-2.v" Highest level modules: pe0 pe pe3 pe4 pc7 2's-complement Integer 1x1=1-word Multiply with 2 PE's PE "PE_7" Reset, Clock Cycle # 0 PE "PE_6" Reset, Clock Cycle # 0 PE "PE_5" Reset, Clock Cycle # 0 PE "PE_4" Reset, Clock Cycle # 0 PE "PE_3" Reset, Clock Cycle # 0 PE "PE_2" Reset, Clock Cycle # 0 PE "PE_1" Reset, Clock Cycle # 0 PE "PE_0" Reset, Clock Cycle # 0 PE "PE_0" Port and Register Dump # 0, Clock Cycle # 44 PE_INSTR: mb_addr 0000 mb_srce mb_d_mem mb_d_in2 mb_d_in1 srce_reg shift reg_res 1 0 0 0 0000 000000 10101010 dest_reg out_res carry alu_dis alu_dis_i mb_dis mb_dis_i (p) 0000 00000000 00 0 0 0 0 FLAGS: reg<0 reg=0 out<0 out=0 in2<0 in2=0 in1<0 in1=0 0 1 0 1 0 1 0 1 disab carry (r) 0 0 IN1: 00000000000000000000000000000000 (r) IN2: 00000000000000000000000000000000 (r) OUT: 00000000000000000000000000000000 (r) R0: 00000000000000000000000000000000 (r) R1: 00000000000000000000000000000000 (r) R2: 00000000000000000000000000000000 (r) R3: 00000000000000000000000000000000 (r) R4: 00000000000000000000000000000000 (r) R5: 00000000000000000000000000000000 (r) R6: 00000000000000000000000000000000 (r) R7: 00000000000000000000000000000000 (r) R8: 00000000000000000000000000000000 (r) R9: 00000000000000000000000000000000 (r) DISABLE: 00000000000000000000000000000000 (r) ROUTER: 00000000000000000000000000000000 (r) R_IN: 00000000000000000000000000000000 (p/r) NORTH: 00000000000000000000000000000000 (r) N_IN: 00000000000000000000000000000000 (p) SOUTH: 00000000000000000000000000000000 (r) S_IN: 00000000000011110000000000000000 (p) EAST: 00000000000000000000000000000000 (r) E_IN: 0 (p) W_IN: 0 (p) WEST: 00011111111111111111000000000000 (r) PE "PE_0" Memory Dump # 0, Clock Clycle # 44 0: 0001ffff 00010000 xxxxxxxx xxxxxxxx 4: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 8: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx c: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx PE "PE_1" Port and Register Dump # 0, Clock Cycle # 44 PE_INSTR: mb_addr 0000 mb_srce mb_d_mem mb_d_in2 mb_d_in1 srce_reg shift reg_res 1 0 0 0 0000 000000 10101010 dest_reg out_res carry alu_dis alu_dis_i mb_dis mb_dis_i (p) 0000 00000000 00 0 0 0 0 FLAGS: reg<0 reg=0 out<0 out=0 in2<0 in2=0 in1<0 in1=0 0 1 0 1 0 1 0 0 disab carry (r) 0 0 IN1: 00000000000011110000000000000000 (r) IN2: 00000000000000000000000000000000 (r) OUT: 00000000000000000000000000000000 (r) R0: 00000000000000000000000000000000 (r) R1: 00000000000000000000000000000000 (r) R2: 00000000000000000000000000000000 (r) R3: 00000000000000000000000000000000 (r) R4: 00000000000000000000000000000000 (r) R5: 00000000000000000000000000000000 (r) R6: 00000000000000000000000000000000 (r) R7: 00000000000000000000000000000000 (r) R8: 00000000000000000000000000000000 (r) R9: 00000000000000000000000000000000 (r) DISABLE: 00000000000000000000000000000000 (r) ROUTER: 00000000000000000000000000000000 (r) R_IN: 00000000000000000000000000000000 (p/r) NORTH: 00000000000011110000000000000000 (r) N_IN: 00000000000000000000000000000000 (p) SOUTH: 00000000000011110000000000000000 (r) S_IN: 00001111000000000000000000000000 (p) EAST: 00000000000100000000000000000000 (r) E_IN: 0 (p) W_IN: 1 (p) WEST: 11110000000000000001111111111111 (r) PE "PE_1" Memory Dump # 0, Clock Clycle # 44 0: 0001ffff 00010000 xxxxxxxx xxxxxxxx 4: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 8: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx c: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx PE "PE_2" Port and Register Dump # 0, Clock Cycle # 44 PE_INSTR: mb_addr 0000 mb_srce mb_d_mem mb_d_in2 mb_d_in1 srce_reg shift reg_res 1 0 0 0 0000 000000 10101010 dest_reg out_res carry alu_dis alu_dis_i mb_dis mb_dis_i (p) 0000 00000000 00 0 0 0 0 FLAGS: reg<0 reg=0 out<0 out=0 in2<0 in2=0 in1<0 in1=0 0 1 0 1 0 0 0 0 disab carry (r) 0 0 IN1: 00001111000000000000000000000000 (r) IN2: 00000000000011110000000000000000 (r) OUT: 00000000000000000000000000000000 (r) R0: 00000000000000000000000000000000 (r) R1: 00000000000000000000000000000000 (r) R2: 00000000000000000000000000000000 (r) R3: 00000000000000000000000000000000 (r) R4: 00000000000000000000000000000000 (r) R5: 00000000000000000000000000000000 (r) R6: 00000000000000000000000000000000 (r) R7: 00000000000000000000000000000000 (r) R8: 00000000000000000000000000000000 (r) R9: 00000000000000000000000000000000 (r) DISABLE: 00000000000000000000000000000000 (r) ROUTER: 00000000000000000000000000000000 (r) R_IN: 00000000000000000000000000000000 (p/r) NORTH: 00001111000000000000000000000000 (r) N_IN: 00000000000011110000000000000000 (p) SOUTH: 00001111000011110000000000000000 (r) S_IN: 00000000000000000000000000000000 (p) EAST: 00010000000000000000000000000000 (r) E_IN: 0 (p) W_IN: 1 (p) WEST: 11111111111100000000000000011111 (r) PE "PE_2" Memory Dump # 0, Clock Clycle # 44 0: 0001ffff 00010000 xxxxxxxx xxxxxxxx 4: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 8: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx c: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx PE "PE_3" Port and Register Dump # 0, Clock Cycle # 44 PE_INSTR: mb_addr 0000 mb_srce mb_d_mem mb_d_in2 mb_d_in1 srce_reg shift reg_res 1 0 0 0 0000 000000 10101010 dest_reg out_res carry alu_dis alu_dis_i mb_dis mb_dis_i (p) 0000 00000000 00 0 0 0 0 FLAGS: reg<0 reg=0 out<0 out=0 in2<0 in2=0 in1<0 in1=0 0 1 0 1 0 1 0 0 disab carry (r) 0 0 IN1: 00001111000011110000000000000000 (r) IN2: 00000000000000000000000000000000 (r) OUT: 00000000000000000000000000000000 (r) R0: 00000000000000000000000000000000 (r) R1: 00000000000000000000000000000000 (r) R2: 00000000000000000000000000000000 (r) R3: 00000000000000000000000000000000 (r) R4: 00000000000000000000000000000000 (r) R5: 00000000000000000000000000000000 (r) R6: 00000000000000000000000000000000 (r) R7: 00000000000000000000000000000000 (r) R8: 00000000000000000000000000000000 (r) R9: 00000000000000000000000000000000 (r) DISABLE: 00000000000000000000000000000000 (r) ROUTER: 00000000000000000000000000000000 (r) R_IN: 00000000000000000000000000000000 (p/r) NORTH: 00000000000000000000000000000000 (r) N_IN: 00001111000011110000000000000000 (p) SOUTH: 00001111000011110000000000000000 (r) S_IN: 11110000111100000000000000000000 (p) EAST: 00000000000000000000000000000000 (r) E_IN: 0 (p) W_IN: 0 (p) WEST: 00001111111111111111111111110000 (r) PE "PE_3" Memory Dump # 0, Clock Clycle # 44 0: 0001ffff 00010000 ffff0000 xxxxxxxx 4: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 8: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx c: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx PE "PE_4" Port and Register Dump # 0, Clock Cycle # 44 PE_INSTR: mb_addr 0000 mb_srce mb_d_mem mb_d_in2 mb_d_in1 srce_reg shift reg_res 1 0 0 0 0000 000000 10101010 dest_reg out_res carry alu_dis alu_dis_i mb_dis mb_dis_i (p) 0000 00000000 00 0 0 0 0 FLAGS: reg<0 reg=0 out<0 out=0 in2<0 in2=0 in1<0 in1=0 0 1 0 1 0 1 1 0 disab carry (r) 0 0 IN1: 11110000111100000000000000000000 (r) IN2: 00000000000000000000000000000000 (r) OUT: 00000000000000000000000000000000 (r) R0: 00000000000000000000000000000000 (r) R1: 00000000000000000000000000000000 (r) R2: 00000000000000000000000000000000 (r) R3: 00000000000000000000000000000000 (r) R4: 00000000000000000000000000000000 (r) R5: 00000000000000000000000000000000 (r) R6: 00000000000000000000000000000000 (r) R7: 00000000000000000000000000000000 (r) R8: 00000000000000000000000000000000 (r) R9: 00000000000000000000000000000000 (r) DISABLE: 00000000000000000000000000000000 (r) ROUTER: 00000000000000000000000000000000 (r) R_IN: 00000000000000000000000000000000 (p/r) NORTH: 11110000111100000000000000000000 (r) N_IN: 00001111000011110000000000000000 (p) SOUTH: 00000000111100000000000000000000 (r) S_IN: 11110000111100000000000000000000 (p) EAST: 00000001000000000000000000000000 (r) E_IN: 0 (p) W_IN: 1 (p) WEST: 11111111000000000000000111111111 (r) PE "PE_4" Memory Dump # 0, Clock Clycle # 44 0: 0001ffff 00010000 ffff0000 xxxxxxxx 4: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 8: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx c: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx PE "PE_5" Port and Register Dump # 0, Clock Cycle # 44 PE_INSTR: mb_addr 0000 mb_srce mb_d_mem mb_d_in2 mb_d_in1 srce_reg shift reg_res 1 0 0 0 0000 000000 10101010 dest_reg out_res carry alu_dis alu_dis_i mb_dis mb_dis_i (p) 0000 00000000 00 0 0 0 0 FLAGS: reg<0 reg=0 out<0 out=0 in2<0 in2=0 in1<0 in1=0 0 1 0 1 0 1 1 0 disab carry (r) 0 0 IN1: 11110000111100000000000000000000 (r) IN2: 00000000000000000000000000000000 (r) OUT: 00000000000000000000000000000000 (r) R0: 00000000000000000000000000000000 (r) R1: 00000000000000000000000000000000 (r) R2: 00000000000000000000000000000000 (r) R3: 00000000000000000000000000000000 (r) R4: 00000000000000000000000000000000 (r) R5: 00000000000000000000000000000000 (r) R6: 00000000000000000000000000000000 (r) R7: 00000000000000000000000000000000 (r) R8: 00000000000000000000000000000000 (r) R9: 00000000000000000000000000000000 (r) DISABLE: 00000000000000000000000000000000 (r) ROUTER: 00000000000000000000000000000000 (r) R_IN: 00000000000000000000000000000000 (p/r) NORTH: 11110000111100000000000000000000 (r) N_IN: 00000000111100000000000000000000 (p) SOUTH: 11110000111100000000000000000000 (r) S_IN: 00000000000000000000000000000000 (p) EAST: 00000000000000000000000000000000 (r) E_IN: 0 (p) W_IN: 1 (p) WEST: 11111111111111110000000000000001 (r) PE "PE_5" Memory Dump # 0, Clock Clycle # 44 0: 0001ffff 00010000 xxxxxxxx xxxxxxxx 4: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 8: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx c: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx PE "PE_6" Port and Register Dump # 0, Clock Cycle # 44 PE_INSTR: mb_addr 0000 mb_srce mb_d_mem mb_d_in2 mb_d_in1 srce_reg shift reg_res 1 0 0 0 0000 000000 10101010 dest_reg out_res carry alu_dis alu_dis_i mb_dis mb_dis_i (p) 0000 00000000 00 0 0 0 0 FLAGS: reg<0 reg=0 out<0 out=0 in2<0 in2=0 in1<0 in1=0 0 1 0 1 0 1 0 1 disab carry (r) 0 0 IN1: 00000000000000000000000000000000 (r) IN2: 00000000000000000000000000000000 (r) OUT: 00000000000000000000000000000000 (r) R0: 00000000000000000000000000000000 (r) R1: 00000000000000000000000000000000 (r) R2: 00000000000000000000000000000000 (r) R3: 00000000000000000000000000000000 (r) R4: 00000000000000000000000000000000 (r) R5: 00000000000000000000000000000000 (r) R6: 00000000000000000000000000000000 (r) R7: 00000000000000000000000000000000 (r) R8: 00000000000000000000000000000000 (r) R9: 00000000000000000000000000000000 (r) DISABLE: 00000000000000000000000000000000 (r) ROUTER: 00000000000000000000000000000000 (r) R_IN: 00000000000000000000000000000000 (p/r) NORTH: 00000000000000000000000000000000 (r) N_IN: 11110000111100000000000000000000 (p) SOUTH: 00000000000000000000000000000000 (r) S_IN: 00000000000000000000000000000000 (p) EAST: 00000000000000000000000000000000 (r) E_IN: 0 (p) W_IN: 0 (p) WEST: 00001111111111111111111100000000 (r) PE "PE_6" Memory Dump # 0, Clock Clycle # 44 0: 0001ffff 00010000 xxxxxxxx xxxxxxxx 4: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 8: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx c: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx PE "PE_7" Port and Register Dump # 0, Clock Cycle # 44 PE_INSTR: mb_addr 0000 mb_srce mb_d_mem mb_d_in2 mb_d_in1 srce_reg shift reg_res 1 0 0 0 0000 000000 10101010 dest_reg out_res carry alu_dis alu_dis_i mb_dis mb_dis_i (p) 0000 00000000 00 0 0 0 0 FLAGS: reg<0 reg=0 out<0 out=0 in2<0 in2=0 in1<0 in1=0 0 1 0 1 0 1 0 1 disab carry (r) 0 0 IN1: 00000000000000000000000000000000 (r) IN2: 00000000000000000000000000000000 (r) OUT: 00000000000000000000000000000000 (r) R0: 00000000000000000000000000000000 (r) R1: 00000000000000000000000000000000 (r) R2: 00000000000000000000000000000000 (r) R3: 00000000000000000000000000000000 (r) R4: 00000000000000000000000000000000 (r) R5: 00000000000000000000000000000000 (r) R6: 00000000000000000000000000000000 (r) R7: 00000000000000000000000000000000 (r) R8: 00000000000000000000000000000000 (r) R9: 00000000000000000000000000000000 (r) DISABLE: 00000000000000000000000000000000 (r) ROUTER: 00000000000000000000000000000000 (r) R_IN: 00000000000000000000000000000000 (p/r) NORTH: 00000000000000000000000000000000 (r) N_IN: 00000000000000000000000000000000 (p) SOUTH: 00000000000000000000000000000000 (r) S_IN: 00000000000000000000000000000000 (p) EAST: 00000000000000000000000000000000 (r) E_IN: 0 (p) W_IN: 1 (p) WEST: 00001111111111111111111111111111 (r) PE "PE_7" Memory Dump # 0, Clock Clycle # 44 0: 0001ffff 00010000 xxxxxxxx xxxxxxxx 4: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 8: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx c: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx PE_3 unsigned: 131071 x 65536 = 4294901760, overflow = 1 PE_4 unsigned: 131071 x 65536 = 4294901760, overflow = 1 44 clock cycles 50 warnings 46372 simulation events CPU time: 1.6 secs to compile + 1.2 secs to link + 7.9 secs in simulation End of VERILOG-XL 1.6a.4 Jul 14, 1994 19:14:10 module pc8; // pc08-.v // a Verilog-XL behavioral model of a reconfigurable processor configuration // for a 2's-complement integer sequential restoring divide with 1 PE // (using fixed-(1-bit)-shift EAST & WEST registers) // (highest level module; requires module pe in file pe.v) // for Dr. W. B. Ligon, E&CE dept., Clemson U., 1992-4 // by Ken Winiecki // system parameters: `define ADDR_WIDTH 4 // width of PE memory address, in bits parameter WORD_WIDTH = 32, // width of PE word, in bits MEM_LENGTH = 16, // length of PE memory, in words MHZ = 16.7, // clock speed, in "megahertz" NS = 500/MHZ, // clock half-period, in "nanoseconds" ADDR_WIDTH = `ADDR_WIDTH; // Had to use a "`define" above to // circumnavigate a Verilog-XL bug that // prevents parameters from working as // bit length specifiers!!! // variable declarations: reg clock, reset, dump_r_0, dump_m_0, neg_Q, neg_R, e_in, div_0; reg [ADDR_WIDTH+34-1:0] instr_0; wire [9:0] flags_0; integer clock_ct; // PE instances and connections: pe pe_0 (clock, reset, instr_0, flags_0, 0, 0, e_in, 0, 0, , , , , , 0,,,,, dump_r_0, dump_m_0); defparam // set PE-instance parameters to system parameters: pe_0.ADDR_WIDTH = ADDR_WIDTH, pe_0.WORD_WIDTH = WORD_WIDTH, pe_0.MEM_LENGTH = MEM_LENGTH, pe_0.PE_NAME = "PE_0"; // PE instruction fields and bit widths: // mb_addr mb_srce mb_d_mem mb_d_in2 mb_d_in1 src_reg ... // (ADDR_WIDTH) 1 1 1 1 4 // reg_res dest_reg out_res carry alu_dis alu_dis_s mb_dis mb_dis_s // 8 4 8 2 1 1 1 1 parameter // PE instruction field values: // mb_addr: memory bus transfer address // mb_srce: source of memory bus transfer is ... OUT = 1'b0, // ... register "OUT" MEM = 1'b1, // ... memory // mb_d_mem & mb_d_in2 & mb_d_in1: destinations of memory bus transfers F = 1'b0, // false T = 1'b1, // true // srce_reg & dest_reg: destination and source registers of ALU operations R0 = 4'b0000, R1 = 4'b0001, R2 = 4'b0010, R3 = 4'b0011, R4 = 4'b0100, R5 = 4'b0101, R6 = 4'b0110, R7 = 4'b0111, R8 = 4'b1000, R9 = 4'b1001, RD = 4'b1010, RR = 4'b1011, RN = 4'b1100, RS = 4'b1101, RE = 4'b1110, RW = 4'b1111, // reg_res & out_res: ALU operands for "OUT" and destination register results IN1 = 8'b11110000, IN2 = 8'b11001100, SRC = 8'b10101010, NIN1 = 8'b00001111, NIN2 = 8'b00110011, NSRC = 8'b01010101, ZEROS = 8'b00000000, ONES = 8'b11111111, AND12 = 8'b11000000, OR12 = 8'b11111100, XOR12 = 8'b00111100, NAND12 = 8'b00111111, NOR12 = 8'b00000011, NXOR12 = 8'b11000011, AND1S = 8'b10100000, OR1S = 8'b11111010, XOR1S = 8'b01011010, NAND1S = 8'b01011111, NOR1S = 8'b00000101, NXOR1S = 8'b10100101, AND2S = 8'b10001000, OR2S = 8'b11101110, XOR2S = 8'b01100110, NAND2S = 8'b01110111, NOR2S = 8'b00010001, NXOR2S = 8'b10011001, AND12S = 8'b10000000, OR12S = 8'b11111110, XOR12S = 8'b01111110, NAND12S = 8'b01111111, NOR12S = 8'b00000001, NXOR12S = 8'b10000001, SUM12S = 8'b10010110, // carry: // Note: The PE only does EITHER a carry operation OR an ALU operation in // one instruction cycle. A carry word is computed using registers IN_1 // and IN_2 and the specified carry-in, and is made available at the input // from the router, r_in. The carry-out is placed in the carry flag of the // p_flags register. CN = 2'b00, // do not compute carry (do an ALU operation) CC = 2'b01, // compute carry using carry flag for carry-in C0 = 2'b10, // compute carry using 0 for carry-in C1 = 2'b11; // compute carry using 1 for carry-in // alu_dis & mb_dis: allow disabling of ALU/memory bus operation; T or F // alu_dis_i & mb_dis_i: invert PE disable bit for ALU/memory bus op; T or F // convenience parameters and definitions for writing PE instructions: `define M `ADDR_WIDTH'h // when specifying memory address (in hexidecimal) parameter NMEM = { `M 0, MEM, F, F, F }; // when not using memory bus parameter NALU = { R0, SRC, R0, ZEROS }; // when not doing ALU ops // (note that flags are affected and OUT is cleared) parameter NDIS = { F, F, F, F }; // when not allowing any disabling parameter NOOP = { NMEM, NALU, CN, NDIS }; // when squandering clock cycles // (note that flags are affected and OUT is cleared) parameter // bit positions of PE flags // Note that flags other than carry are not affected by a carry operation, // and the carry flag is not affected by an ALU operation. REG_MSB_F = 9, // m.s.b. of destination register REG_ZER_F = 8, // if destination register = 0 OUT_MSB_F = 7, // m.s.b. of register OUT OUT_ZER_F = 6, // if register OUT = 0 IN2_MSB_F = 5, // m.s.b. of register IN_2 IN2_ZER_F = 4, // if register IN_2 = 0 IN1_MSB_F = 3, // m.s.b. of register IN_1 IN1_ZER_F = 2, // if register IN_1 = 0 DISABLE_F = 1, // state of PE disable bit CARRY_F = 0; // result of last carry operation // Write a program in Verilog-XL to control the processing elements directly, // and put it in the "initial" block to execute. Set up the PE memory. // Reset the PE by clocking it's p_reset line. (Clock a line using #NS to // delay each transition.) Build a PE instruction using the above defined // instruction fields and value parameters. Clock the PEs. Monitor the // flags of a PE using the above bit position parameters. Note: Verilog-XL // has no way of dealing realistically with "don't cares," so never allow the // use of "don't knows" (undefined values)!!! initial begin form_feed; $display("Division, Integer, Restoring, 2s-Complement, %0d-Bit, 1 PE", WORD_WIDTH); clock = 0; clock_ct = 0; dump_r_0 = 0; dump_m_0 = 0; pe_0.mem[0] = 32'h7fffffff; pe_0.mem[1] = 32'h00000003; // clear all registers reset_regs; // mem[0] -> in_2 (A) do({ `M 0, MEM, F, T, F, NALU, CN, NDIS }); if (flags_0[IN2_MSB_F]) // A < 0 begin // set negative quotient & remainder indicators, out = ~in_2 neg_Q = 1; neg_R = 1; do({ NMEM, R0, SRC, R0, NIN2, CN, NDIS }); // in_2 = out do({ `M 0, OUT, F, T, F, NALU, CN, NDIS }); // compute carry for in_2 + 1 do({ NMEM, NALU, C1, NDIS }); end else // A >= 0 begin // filler cycle neg_Q = 0; neg_R = 0; do({ NOOP }); // filler cycle do({ NOOP }); // compute carry for in_2 + 0 do({ NMEM, NALU, C0, NDIS }); end // R0 (A) = in_2 + c, mem[1] -> in_2 (B), save MSB of R0 (A), check div_0 do({ `M 1, MEM, F, T, F, RR, SUM12S, R0, ZEROS, CN, NDIS }); e_in = flags_0[REG_MSB_F]; div_0 = flags_0[IN2_ZER_F]; if (flags_0[IN2_MSB_F]) // B < 0 begin // toggle negative quotient indicator // filler cycle (want negative operand for subtraction) neg_Q = ~neg_Q; do({ NOOP }); // filler cycle do({ NOOP }); // compute carry for in_2 + 0, clear out do({ NMEM, NALU, C0, NDIS }); end else // B >= 0 begin // out = ~in_2 (want negative operand for subtraction) do({ NMEM, R0, SRC, R0, NIN2, CN, NDIS }); // in_2 = out do({ `M 0, OUT, F, T, F, NALU, CN, NDIS }); // compute carry for in_2 + 1, clear out do({ NMEM, NALU, C1, NDIS }); end // out (-B) = in_2 + c do({ NMEM, RR, SRC, RR, SUM12S, CN, NDIS }); // out -> in_2 (-B) do({ `M 0, OUT, F, T, F, NALU, CN, NDIS }); repeat(WORD_WIDTH) begin // out (R) = east shifted left with MSB of A do({ NMEM, RE, ZEROS, RE, SRC, CN, NDIS }); // out -> in_1 (R), east (A) = r0 do({ `M 0, OUT, F, F, T, R0, SRC, RE, ZEROS, CN, NDIS }); // compute carry for in_1 (R) + in_2 (-B) do({ NMEM, NALU, C0, NDIS }); // out (R-B) = in_1 + in_2 do({ NMEM, RR, SRC, RR, SUM12S, CN, NDIS }); if (flags_0[OUT_MSB_F]) // R-B < 0 begin // r0 (A) = east shifted left with 0 e_in = 0; do({ NMEM, RE, SRC, R0, ZEROS, CN, NDIS }); // save MSB of R0 (A), east (R) & out = in_1 (R) e_in = flags_0[REG_MSB_F]; do({ NMEM, RR, IN1, RE, IN1, CN, NDIS }); end else // R-B >= 0 begin // r0 (A) = east shifted left with 1 e_in = 1; do({ NMEM, RE, SRC, R0, ZEROS, CN, NDIS }); // save MSB of R0 (A), east (R) & out = in_1 (R) + in_2 (-B) e_in = flags_0[REG_MSB_F]; do({ NMEM, RR, SUM12S, RE, SUM12S, CN, NDIS }); end end if (neg_R) // want negative remainder begin // out -> in_2, clear out do({ `M 3, OUT, F, T, F, NALU, CN, NDIS }); // clear in_1, out = ~in_2 do({ `M 3, OUT, F, F, T, R0, SRC, R0, NIN2, CN, NDIS }); // out -> in_2 do({ `M 3, OUT, F, T, F, NALU, CN, NDIS }); // compute carry for in_2 + 1 do({ NMEM, NALU, C1, NDIS }); // out = in_2 + 1 do({ NMEM, RR, SRC, RR, SUM12S, CN, NDIS }); // out -> mem[3] do({ `M 3, OUT, T, F, F, NALU, CN, NDIS }); end else // want positive remainder begin // out -> mem[3] do({ `M 3, OUT, T, F, F, NALU, CN, NDIS }); // filler cycles repeat (5) do({ NOOP }); end if (neg_Q) // want negative quotient begin // clear in_1, out = ~R0 do({ `M 2, OUT, F, F, T, R0, SRC, R0, NSRC, CN, NDIS }); // out -> in_2 do({ `M 2, OUT, F, T, F, NALU, CN, NDIS }); // compute carry for in_2 + 1 do({ NMEM, NALU, C1, NDIS }); // out = in_2 + 1 do({ NMEM, RR, SRC, RR, SUM12S, CN, NDIS }); // out -> mem[2] do({ `M 2, OUT, T, F, F, NALU, CN, NDIS }); end else // want positive quotient begin // out = R0 do({ NMEM, R0, SRC, R0, SRC, CN, NDIS }); // out -> mem[2] do({ `M 2, OUT, T, F, F, NALU, CN, NDIS }); // filler cycles repeat (3) do({ NOOP }); end dump; $display("PE_0 unsigned: %0d / %0d = %0d, R %0d, div_0 %0d", pe_0.mem[0], pe_0.mem[1], pe_0.mem[2], pe_0.mem[3], div_0); $display("%0d clock cycles", clock_ct); end task reset_regs; begin #NS reset = 1; #NS reset = 0; end endtask task do; input [ADDR_WIDTH+34-1:0] i_0; begin instr_0 = i_0; #NS clock = 1; #NS clock = 0; clock_ct = clock_ct + 1; end endtask task dump; begin #NS dump_r_0 = 1; #NS dump_r_0 = 0; #NS dump_m_0 = 1; #NS dump_m_0 = 0; end endtask task form_feed; $write("\14"); endtask endmodule Host command: verilog Command arguments: pe.v pc08-.v VERILOG-XL 1.6a.4 log file created Jul 14, 1994 19:14:16 * Copyright Cadence Design Systems, Inc. 1985, 1988. * * All Rights Reserved. Licensed Software. * * Confidential and proprietary information which is the * * property of Cadence Design Systems, Inc. * Compiling source file "pe.v" Compiling source file "pc08-.v" Highest level modules: pe0 pe2 pe3 pe4 pc8 Division, Integer, Restoring, 2s-Complement, 32-Bit, 1 PE PE "PE_0" Reset, Clock Cycle # 0 PE "PE_0" Port and Register Dump # 0, Clock Cycle # 213 PE_INSTR: mb_addr 0000 mb_srce mb_d_mem mb_d_in2 mb_d_in1 srce_reg reg_res dest_reg 1 0 0 0 0000 10101010 0000 out_res carry alu_dis alu_dis_i mb_dis mb_dis_i (p) 00000000 00 0 0 0 0 FLAGS: reg<0 reg=0 out<0 out=0 in2<0 in2=0 in1<0 in1=0 0 0 0 1 1 0 0 0 disab carry (r) 0 0 IN1: 00000000000000000000000000000001 (r) IN2: 11111111111111111111111111111101 (r) OUT: 00000000000000000000000000000000 (r) R0: 00101010101010101010101010101010 (r) R1: 00000000000000000000000000000000 (r) R2: 00000000000000000000000000000000 (r) R3: 00000000000000000000000000000000 (r) R4: 00000000000000000000000000000000 (r) R5: 00000000000000000000000000000000 (r) R6: 00000000000000000000000000000000 (r) R7: 00000000000000000000000000000000 (r) R8: 00000000000000000000000000000000 (r) R9: 00000000000000000000000000000000 (r) DISABLE: 00000000000000000000000000000000 (r) ROUTER: 00000000000000000000000000000010 (r) R_IN: 00000000000000000000000000000010 (p/r) NORTH: 00000000000000000000000000000000 (r) N_IN: 00000000000000000000000000000000 (p) SOUTH: 00000000000000000000000000000000 (r) S_IN: 00000000000000000000000000000000 (p) EAST: 00000000000000000000000000000001 (r) E_IN: 0 (p) W_IN: 0 (p) WEST: 00000000000000000000000000000000 (r) PE "PE_0" Memory Dump # 0, Clock Clycle # 213 0: 7fffffff 00000003 2aaaaaaa 00000001 4: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 8: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx c: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx PE_0 unsigned: 2147483647 / 3 = 715827882, R 1, div_0 0 213 clock cycles 36 warnings 28296 simulation events CPU time: 1.3 secs to compile + 0.4 secs to link + 4.3 secs in simulation End of VERILOG-XL 1.6a.4 Jul 14, 1994 19:14:22 module pc9; // pc09-.v // a Verilog-XL behavioral model of a reconfigurable processor configuration // for a 2's-complement integer sequential restoring divide with 2 PEs // using fixed-(1-bit)-shift EAST & WEST registers // (highest level module; requires module pe in file pe.v) // for Dr. W. B. Ligon, E&CE dept., Clemson U., 1992-4 // by Ken Winiecki // system parameters: `define ADDR_WIDTH 4 // width of PE memory address, in bits parameter WORD_WIDTH = 32, // width of PE word, in bits MEM_LENGTH = 16, // length of PE memory, in words MHZ = 16.7, // clock speed, in "megahertz" NS = 500/MHZ, // clock half-period, in "nanoseconds" ADDR_WIDTH = `ADDR_WIDTH; // Had to use a "`define" above to // circumnavigate a Verilog-XL bug that // prevents parameters from working as // bit length specifiers!!! // variable declarations: reg clock, reset, dump_r_0, dump_m_0, dump_r_1, dump_m_1, A_neg, B_neg, neg_Q, neg_R, e_in_0, div_0; reg [ADDR_WIDTH+34-1:0] instr_0, instr_1; wire e0_e1; wire [9:0] flags_0, flags_1; integer clock_ct; // PE instances and connections: pe pe_0 (clock, reset, instr_0, flags_0, 0, 0, e_in_0, 0, 0, , , e0_e1, , , 0,,,,, dump_r_0, dump_m_0); pe pe_1 (clock, reset, instr_1, flags_1, 0, 0, e0_e1, 0, 0, , , , , , 0,,,,, dump_r_1, dump_m_1); defparam // set PE-instance parameters to system parameters: pe_0.ADDR_WIDTH = ADDR_WIDTH, pe_0.WORD_WIDTH = WORD_WIDTH, pe_0.MEM_LENGTH = MEM_LENGTH, pe_0.PE_NAME = "PE_0", pe_1.ADDR_WIDTH = ADDR_WIDTH, pe_1.WORD_WIDTH = WORD_WIDTH, pe_1.MEM_LENGTH = MEM_LENGTH, pe_1.PE_NAME = "PE_1"; // PE instruction fields and bit widths: // mb_addr mb_srce mb_d_mem mb_d_in2 mb_d_in1 src_reg ... // (ADDR_WIDTH) 1 1 1 1 4 // reg_res dest_reg out_res carry alu_dis alu_dis_s mb_dis mb_dis_s // 8 4 8 2 1 1 1 1 parameter // PE instruction field values: // mb_addr: memory bus transfer address // mb_srce: source of memory bus transfer is ... OUT = 1'b0, // ... register "OUT" MEM = 1'b1, // ... memory // mb_d_mem & mb_d_in2 & mb_d_in1: destinations of memory bus transfers F = 1'b0, // false T = 1'b1, // true // srce_reg & dest_reg: destination and source registers of ALU operations R0 = 4'b0000, R1 = 4'b0001, R2 = 4'b0010, R3 = 4'b0011, R4 = 4'b0100, R5 = 4'b0101, R6 = 4'b0110, R7 = 4'b0111, R8 = 4'b1000, R9 = 4'b1001, RD = 4'b1010, RR = 4'b1011, RN = 4'b1100, RS = 4'b1101, RE = 4'b1110, RW = 4'b1111, // reg_res & out_res: ALU operands for "OUT" and destination register results IN1 = 8'b11110000, IN2 = 8'b11001100, SRC = 8'b10101010, NIN1 = 8'b00001111, NIN2 = 8'b00110011, NSRC = 8'b01010101, ZEROS = 8'b00000000, ONES = 8'b11111111, AND12 = 8'b11000000, OR12 = 8'b11111100, XOR12 = 8'b00111100, NAND12 = 8'b00111111, NOR12 = 8'b00000011, NXOR12 = 8'b11000011, AND1S = 8'b10100000, OR1S = 8'b11111010, XOR1S = 8'b01011010, NAND1S = 8'b01011111, NOR1S = 8'b00000101, NXOR1S = 8'b10100101, AND2S = 8'b10001000, OR2S = 8'b11101110, XOR2S = 8'b01100110, NAND2S = 8'b01110111, NOR2S = 8'b00010001, NXOR2S = 8'b10011001, AND12S = 8'b10000000, OR12S = 8'b11111110, XOR12S = 8'b01111110, NAND12S = 8'b01111111, NOR12S = 8'b00000001, NXOR12S = 8'b10000001, SUM12S = 8'b10010110, // carry: // Note: The PE only does EITHER a carry operation OR an ALU operation in // one instruction cycle. A carry word is computed using registers IN_1 // and IN_2 and the specified carry-in, and is made available at the input // from the router, r_in. The carry-out is placed in the carry flag of the // p_flags register. CN = 2'b00, // do not compute carry (do an ALU operation) CC = 2'b01, // compute carry using carry flag for carry-in C0 = 2'b10, // compute carry using 0 for carry-in C1 = 2'b11; // compute carry using 1 for carry-in // alu_dis & mb_dis: allow disabling of ALU/memory bus operation; T or F // alu_dis_i & mb_dis_i: invert PE disable bit for ALU/memory bus op; T or F // convenience parameters and definitions for writing PE instructions: `define M `ADDR_WIDTH'h // when specifying memory address (in hexidecimal) parameter NMEM = { `M 0, MEM, F, F, F }; // when not using memory bus parameter NALU = { R0, SRC, R0, ZEROS }; // when not doing ALU ops // (note that flags are affected and OUT is cleared) parameter NDIS = { F, F, F, F }; // when not allowing any disabling parameter NOOP = { NMEM, NALU, CN, NDIS }; // when squandering clock cycles // (note that flags are affected and OUT is cleared) parameter // bit positions of PE flags // Note that flags other than carry are not affected by a carry operation, // and the carry flag is not affected by an ALU operation. REG_MSB_F = 9, // m.s.b. of destination register REG_ZER_F = 8, // if destination register = 0 OUT_MSB_F = 7, // m.s.b. of register OUT OUT_ZER_F = 6, // if register OUT = 0 IN2_MSB_F = 5, // m.s.b. of register IN_2 IN2_ZER_F = 4, // if register IN_2 = 0 IN1_MSB_F = 3, // m.s.b. of register IN_1 IN1_ZER_F = 2, // if register IN_1 = 0 DISABLE_F = 1, // state of PE disable bit CARRY_F = 0; // result of last carry operation // Write a program in Verilog-XL to control the processing elements directly, // and put it in the "initial" block to execute. Set up the PE memory. // Reset the PE by clocking it's p_reset line. (Clock a line using #NS to // delay each transition.) Build a PE instruction using the above defined // instruction fields and value parameters. Clock the PEs. Monitor the // flags of a PE using the above bit position parameters. Note: Verilog-XL // has no way of dealing realistically with "don't cares," so never allow the // use of "don't knows" (undefined values)!!! initial begin form_feed; $display("Division, Integer, Restoring, 2s-Complement, %0d-Bit, 2 PEs", WORD_WIDTH); clock = 0; clock_ct = 0; dump_r_0 = 0; dump_m_0 = 0; dump_r_1 = 0; dump_m_1 = 0; pe_0.mem[0] = 32'h7fffffff; pe_1.mem[0] = 32'h00000003; // clear all registers reset_regs; // 0: mem[0] -> in_2 (A) // 1: mem[0] -> in_2 (B) instr_0 = { `M 0, MEM, F, T, F, NALU, CN, NDIS }; instr_1 = { `M 0, MEM, F, T, F, NALU, CN, NDIS }; clockem; // 0: if in_2 (A) < 0, then... // 1: check div_0; if in_2 (B) > 0, then... // both: out = ~in_2, in_2 = out, compute carry for in_2 + 1; // otherwise, in_2 (B) < 0, so compute carry for in_2 + 0 div_0 = flags_1[IN2_ZER_F]; A_neg = flags_0[IN2_MSB_F]; B_neg = flags_1[IN2_MSB_F]; neg_Q = A_neg ^ B_neg; neg_R = A_neg; if (A_neg) instr_0 = { NMEM, R0, SRC, R0, NIN2, CN, NDIS }; else instr_0 = { NOOP }; if (B_neg) instr_1 = { NOOP }; else instr_1 = { NMEM, R0, SRC, R0, NIN2, CN, NDIS }; clockem; if (A_neg) instr_0 = { `M 0, OUT, F, T, F, NALU, CN, NDIS }; else instr_0 = { NOOP }; if (B_neg) instr_1 = { NOOP }; else instr_1 = { `M 0, OUT, F, T, F, NALU, CN, NDIS }; clockem; if (A_neg) instr_0 = { NMEM, NALU, C1, NDIS }; else instr_0 = { NMEM, NALU, C0, NDIS }; if (B_neg) instr_1 = { NMEM, NALU, C0, NDIS }; else instr_1 = { NMEM, NALU, C1, NDIS }; clockem; // 0: east (A) = in_2 + c // 1: out (-B) = in_2 + c instr_0 = { NMEM, RR, SUM12S, RE, ZEROS, CN, NDIS }; instr_1 = { NMEM, RR, SRC, RR, SUM12S, CN, NDIS }; clockem; // 0: do nothing // 1: out -> in_2 (-B) instr_0 = { NOOP }; instr_1 = { `M 0, OUT, F, T, F, NALU, CN, NDIS }; clockem; repeat(WORD_WIDTH) begin // 0: do nothing // 1: out (R) = east shifted left with MSB of pe_0.east (A,Q) instr_0 = { NOOP }; instr_1 = { NMEM, RE, ZEROS, RE, SRC, CN, NDIS }; clockem; // 0: do nothing // 1: out -> in_1 (R) instr_1 = { `M 0, OUT, F, F, T, NALU, CN, NDIS }; clockem; // 0: do nothing // 1: compute carry for in_1 (R) + in_2 (-B) instr_1 = { NMEM, NALU, C0, NDIS }; clockem; // 0: do nothing // 1: out (R-B) = in_1 + in_2 instr_1 = { NMEM, RR, SRC, RR, SUM12S, CN, NDIS }; clockem; if (flags_1[OUT_MSB_F]) // R-B < 0 begin // 0: east (Q,A) & out = east shifted left with 0 // 1: east (R) & out = in_1 (R) e_in_0 = 0; instr_0 = { NMEM, RE, SRC, RE, SRC, CN, NDIS }; instr_1 = { NMEM, RR, IN1, RE, IN1, CN, NDIS }; clockem; end else // R-B >= 0 begin // 0: east (Q,A) & out = east shifted left with 1 // 1: east (R) & out = in_1 (R) + in_2 (-B) e_in_0 = 1; instr_0 = { NMEM, RE, SRC, RE, SRC, CN, NDIS }; instr_1 = { NMEM, RR, SUM12S, RE, SUM12S, CN, NDIS }; clockem; end end // 0: if want negative quotient, then... // 1: if want negative remainder, then... // both: out -> in_2, clear out, clear in_1, out = ~in_2, out -> in_2, // compute carry for in_2 + 1, out = in_2 + 1, out -> mem[1]; // otherwise, out -> mem[1] if (neg_Q) instr_0 = { `M 1, OUT, F, T, F, NALU, CN, NDIS }; else instr_0 = { `M 1, OUT, T, F, F, NALU, CN, NDIS }; if (neg_R) instr_1 = { `M 1, OUT, F, T, F, NALU, CN, NDIS }; else instr_1 = { `M 1, OUT, T, F, F, NALU, CN, NDIS }; clockem; if (neg_Q) instr_0 = { `M 1, OUT, F, F, T, R0, SRC, R0, NIN2, CN, NDIS }; else instr_0 = { NOOP }; if (neg_R) instr_1 = { `M 1, OUT, F, F, T, R0, SRC, R0, NIN2, CN, NDIS }; else instr_1 = { NOOP }; clockem; if (neg_Q) instr_0 = { `M 1, OUT, F, T, F, NALU, CN, NDIS }; else instr_0 = { NOOP }; if (neg_R) instr_1 = { `M 1, OUT, F, T, F, NALU, CN, NDIS }; else instr_1 = { NOOP }; clockem; if (neg_Q) instr_0 = { NMEM, NALU, C1, NDIS }; else instr_0 = { NOOP }; if (neg_R) instr_1 = { NMEM, NALU, C1, NDIS }; else instr_1 = { NOOP }; clockem; if (neg_Q) instr_0 = { NMEM, RR, SRC, RR, SUM12S, CN, NDIS }; else instr_0 = { NOOP }; if (neg_R) instr_1 = { NMEM, RR, SRC, RR, SUM12S, CN, NDIS }; else instr_1 = { NOOP }; clockem; if (neg_Q) instr_0 = { `M 1, OUT, T, F, F, NALU, CN, NDIS }; else instr_0 = { NOOP }; if (neg_R) instr_1 = { `M 1, OUT, T, F, F, NALU, CN, NDIS }; else instr_1 = { NOOP }; clockem; dump; $display("Unsigned: %0d / %0d = %0d, R %0d, div_0 %0d", pe_0.mem[0], pe_1.mem[0], pe_0.mem[1], pe_1.mem[1], div_0); $display("%0d clock cycles", clock_ct); end task reset_regs; begin #NS reset = 1; #NS reset = 0; end endtask task clockem; begin #NS clock = 1; #NS clock = 0; clock_ct = clock_ct + 1; end endtask task dump; begin #NS dump_r_0 = 1; #NS dump_r_0 = 0; #NS dump_m_0 = 1; #NS dump_m_0 = 0; form_feed; #NS dump_r_1 = 1; #NS dump_r_1 = 0; #NS dump_m_1 = 1; #NS dump_m_1 = 0; end endtask task form_feed; $write("\14"); endtask endmodule Host command: verilog Command arguments: pe.v pc09-.v VERILOG-XL 1.6a.4 log file created Jul 14, 1994 19:14:28 * Copyright Cadence Design Systems, Inc. 1985, 1988. * * All Rights Reserved. Licensed Software. * * Confidential and proprietary information which is the * * property of Cadence Design Systems, Inc. * Compiling source file "pe.v" Compiling source file "pc09-.v" Highest level modules: pe0 pe2 pe3 pe4 pc9 Division, Integer, Restoring, 2s-Complement, 32-Bit, 2 PEs PE "PE_1" Reset, Clock Cycle # 0 PE "PE_0" Reset, Clock Cycle # 0 PE "PE_0" Port and Register Dump # 0, Clock Cycle # 172 PE_INSTR: mb_addr 0000 mb_srce mb_d_mem mb_d_in2 mb_d_in1 srce_reg reg_res dest_reg 1 0 0 0 0000 10101010 0000 out_res carry alu_dis alu_dis_i mb_dis mb_dis_i (p) 00000000 00 0 0 0 0 FLAGS: reg<0 reg=0 out<0 out=0 in2<0 in2=0 in1<0 in1=0 0 1 0 1 0 0 0 0 disab carry (r) 0 0 IN1: 00000000000000000000000000000000 (r) IN2: 01111111111111111111111111111111 (r) OUT: 00000000000000000000000000000000 (r) R0: 00000000000000000000000000000000 (r) R1: 00000000000000000000000000000000 (r) R2: 00000000000000000000000000000000 (r) R3: 00000000000000000000000000000000 (r) R4: 00000000000000000000000000000000 (r) R5: 00000000000000000000000000000000 (r) R6: 00000000000000000000000000000000 (r) R7: 00000000000000000000000000000000 (r) R8: 00000000000000000000000000000000 (r) R9: 00000000000000000000000000000000 (r) DISABLE: 00000000000000000000000000000000 (r) ROUTER: 00000000000000000000000000000000 (r) R_IN: 00000000000000000000000000000000 (p/r) NORTH: 00000000000000000000000000000000 (r) N_IN: 00000000000000000000000000000000 (p) SOUTH: 00000000000000000000000000000000 (r) S_IN: 00000000000000000000000000000000 (p) EAST: 00101010101010101010101010101010 (r) E_IN: 0 (p) W_IN: 0 (p) WEST: 00000000000000000000000000000000 (r) PE "PE_0" Memory Dump # 0, Clock Clycle # 172 0: 7fffffff 2aaaaaaa xxxxxxxx xxxxxxxx 4: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 8: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx c: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx PE "PE_1" Port and Register Dump # 0, Clock Cycle # 172 PE_INSTR: mb_addr 0000 mb_srce mb_d_mem mb_d_in2 mb_d_in1 srce_reg reg_res dest_reg 1 0 0 0 0000 10101010 0000 out_res carry alu_dis alu_dis_i mb_dis mb_dis_i (p) 00000000 00 0 0 0 0 FLAGS: reg<0 reg=0 out<0 out=0 in2<0 in2=0 in1<0 in1=0 0 1 0 1 1 0 0 0 disab carry (r) 0 0 IN1: 00000000000000000000000000000001 (r) IN2: 11111111111111111111111111111101 (r) OUT: 00000000000000000000000000000000 (r) R0: 00000000000000000000000000000000 (r) R1: 00000000000000000000000000000000 (r) R2: 00000000000000000000000000000000 (r) R3: 00000000000000000000000000000000 (r) R4: 00000000000000000000000000000000 (r) R5: 00000000000000000000000000000000 (r) R6: 00000000000000000000000000000000 (r) R7: 00000000000000000000000000000000 (r) R8: 00000000000000000000000000000000 (r) R9: 00000000000000000000000000000000 (r) DISABLE: 00000000000000000000000000000000 (r) ROUTER: 00000000000000000000000000000010 (r) R_IN: 00000000000000000000000000000010 (p/r) NORTH: 00000000000000000000000000000000 (r) N_IN: 00000000000000000000000000000000 (p) SOUTH: 00000000000000000000000000000000 (r) S_IN: 00000000000000000000000000000000 (p) EAST: 00000000000000000000000000000001 (r) E_IN: 0 (p) W_IN: 0 (p) WEST: 00000000000000000000000000000000 (r) PE "PE_1" Memory Dump # 0, Clock Clycle # 172 0: 00000003 00000001 xxxxxxxx xxxxxxxx 4: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 8: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx c: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx Unsigned: 2147483647 / 3 = 715827882, R 1, div_0 0 172 clock cycles 38 warnings 44890 simulation events CPU time: 1.3 secs to compile + 0.5 secs to link + 6.8 secs in simulation End of VERILOG-XL 1.6a.4 Jul 14, 1994 19:14:37 module pc10; // pc10-.v // a Verilog-XL behavioral model of a reconfigurable processor configuration // for a 2's-complement integer sequential non-restoring divide with 1 PE // (using fixed-(1-bit)-shift EAST & WEST registers) // (highest level module; requires module pe in file pe.v) // for Dr. W. B. Ligon, E&CE dept., Clemson U., 1992-4 // by Ken Winiecki // system parameters: `define ADDR_WIDTH 4 // width of PE memory address, in bits parameter WORD_WIDTH = 32, // width of PE word, in bits MEM_LENGTH = 16, // length of PE memory, in words MHZ = 16.7, // clock speed, in "megahertz" NS = 500/MHZ, // clock half-period, in "nanoseconds" ADDR_WIDTH = `ADDR_WIDTH; // Had to use a "`define" above to // circumnavigate a Verilog-XL bug that // prevents parameters from working as // bit length specifiers!!! // variable declarations: reg clock, reset, dump_r_0, dump_m_0, B_neg, neg_Q, neg_R, e_in, div_0; reg [ADDR_WIDTH+34-1:0] instr_0; wire e_out; wire [9:0] flags_0; integer clock_ct; // PE instances and connections: pe pe_0 (clock, reset, instr_0, flags_0, 0, 0, e_in, 0, 0, , , e_out, , , 0,,,,, dump_r_0, dump_m_0); defparam // set PE-instance parameters to system parameters: pe_0.ADDR_WIDTH = ADDR_WIDTH, pe_0.WORD_WIDTH = WORD_WIDTH, pe_0.MEM_LENGTH = MEM_LENGTH, pe_0.PE_NAME = "PE_0"; // PE instruction fields and bit widths: // mb_addr mb_srce mb_d_mem mb_d_in2 mb_d_in1 src_reg ... // (ADDR_WIDTH) 1 1 1 1 4 // reg_res dest_reg out_res carry alu_dis alu_dis_s mb_dis mb_dis_s // 8 4 8 2 1 1 1 1 parameter // PE instruction field values: // mb_addr: memory bus transfer address // mb_srce: source of memory bus transfer is ... OUT = 1'b0, // ... register "OUT" MEM = 1'b1, // ... memory // mb_d_mem & mb_d_in2 & mb_d_in1: destinations of memory bus transfers F = 1'b0, // false T = 1'b1, // true // srce_reg & dest_reg: destination and source registers of ALU operations R0 = 4'b0000, R1 = 4'b0001, R2 = 4'b0010, R3 = 4'b0011, R4 = 4'b0100, R5 = 4'b0101, R6 = 4'b0110, R7 = 4'b0111, R8 = 4'b1000, R9 = 4'b1001, RD = 4'b1010, RR = 4'b1011, RN = 4'b1100, RS = 4'b1101, RE = 4'b1110, RW = 4'b1111, // reg_res & out_res: ALU operands for "OUT" and destination register results IN1 = 8'b11110000, IN2 = 8'b11001100, SRC = 8'b10101010, NIN1 = 8'b00001111, NIN2 = 8'b00110011, NSRC = 8'b01010101, ZEROS = 8'b00000000, ONES = 8'b11111111, AND12 = 8'b11000000, OR12 = 8'b11111100, XOR12 = 8'b00111100, NAND12 = 8'b00111111, NOR12 = 8'b00000011, NXOR12 = 8'b11000011, AND1S = 8'b10100000, OR1S = 8'b11111010, XOR1S = 8'b01011010, NAND1S = 8'b01011111, NOR1S = 8'b00000101, NXOR1S = 8'b10100101, AND2S = 8'b10001000, OR2S = 8'b11101110, XOR2S = 8'b01100110, NAND2S = 8'b01110111, NOR2S = 8'b00010001, NXOR2S = 8'b10011001, AND12S = 8'b10000000, OR12S = 8'b11111110, XOR12S = 8'b01111110, NAND12S = 8'b01111111, NOR12S = 8'b00000001, NXOR12S = 8'b10000001, SUM12S = 8'b10010110, // carry: // Note: The PE only does EITHER a carry operation OR an ALU operation in // one instruction cycle. A carry word is computed using registers IN_1 // and IN_2 and the specified carry-in, and is made available at the input // from the router, r_in. The carry-out is placed in the carry flag of the // p_flags register. CN = 2'b00, // do not compute carry (do an ALU operation) CC = 2'b01, // compute carry using carry flag for carry-in C0 = 2'b10, // compute carry using 0 for carry-in C1 = 2'b11; // compute carry using 1 for carry-in // alu_dis & mb_dis: allow disabling of ALU/memory bus operation; T or F // alu_dis_i & mb_dis_i: invert PE disable bit for ALU/memory bus op; T or F // convenience parameters and definitions for writing PE instructions: `define M `ADDR_WIDTH'h // when specifying memory address (in hexidecimal) parameter NMEM = { `M 0, MEM, F, F, F }; // when not using memory bus parameter NALU = { R0, SRC, R0, ZEROS }; // when not doing ALU ops // (note that flags are affected and OUT is cleared) parameter NDIS = { F, F, F, F }; // when not allowing any disabling parameter NOOP = { NMEM, NALU, CN, NDIS }; // when squandering clock cycles // (note that flags are affected and OUT is cleared) parameter // bit positions of PE flags // Note that flags other than carry are not affected by a carry operation, // and the carry flag is not affected by an ALU operation. REG_MSB_F = 9, // m.s.b. of destination register REG_ZER_F = 8, // if destination register = 0 OUT_MSB_F = 7, // m.s.b. of register OUT OUT_ZER_F = 6, // if register OUT = 0 IN2_MSB_F = 5, // m.s.b. of register IN_2 IN2_ZER_F = 4, // if register IN_2 = 0 IN1_MSB_F = 3, // m.s.b. of register IN_1 IN1_ZER_F = 2, // if register IN_1 = 0 DISABLE_F = 1, // state of PE disable bit CARRY_F = 0; // result of last carry operation // Write a program in Verilog-XL to control the processing elements directly, // and put it in the "initial" block to execute. Set up the PE memory. // Reset the PE by clocking it's p_reset line. (Clock a line using #NS to // delay each transition.) Build a PE instruction using the above defined // instruction fields and value parameters. Clock the PEs. Monitor the // flags of a PE using the above bit position parameters. Note: Verilog-XL // has no way of dealing realistically with "don't cares," so never allow the // use of "don't knows" (undefined values)!!! initial begin form_feed; $display("Division, Integer, Non-Restoring, 2s-Complement, %0d-Bit, 1 PE", WORD_WIDTH); clock = 0; clock_ct = 0; dump_r_0 = 0; dump_m_0 = 0; pe_0.mem[0] = 32'h7fffffff; pe_0.mem[1] = 32'h00000003; // clear all registers reset_regs; // mem[0] -> in_2 (A) do({ `M 0, MEM, F, T, F, NALU, CN, NDIS }); if (flags_0[IN2_MSB_F]) // in_2 < 0 begin // set negative quotient & remainder indicators, out = ~in_2 neg_Q = 1; neg_R = 1; do({ NMEM, R0, SRC, R0, NIN2, CN, NDIS }); // in_2 = out do({ `M 0, OUT, F, T, F, NALU, CN, NDIS }); // compute carry for in_2 + 1 do({ NMEM, NALU, C1, NDIS }); end else // in_2 >= 0 begin // filler cycle neg_Q = 0; neg_R = 0; do({ NOOP }); // filler cycle do({ NOOP }); // compute carry for in_2 + 0 do({ NMEM, NALU, C0, NDIS }); end // R0 (A) = in_2 + c, mem[1] -> in_2 (B), save MSB of R0 (A) do({ `M 1, MEM, F, T, F, RR, SUM12S, R0, ZEROS, CN, NDIS }); e_in = flags_0[REG_MSB_F]; // if B < 0, Q must switch sign; if B = 0 set div_0; out (B) = in_2 if (flags_0[IN2_MSB_F]) neg_Q = ~neg_Q; div_0 = flags_0[IN2_ZER_F]; do({ NMEM, R0, SRC, R0, IN2, CN, NDIS }); // save sign of B // if B < 0, out -> mem[3] (-|B|), // else B >= 0, out -> mem[2] (+|B|); // out (B) = ~in_2 B_neg = flags_0[OUT_MSB_F]; if (B_neg) do({ `M 3, OUT, T, F, F, R0, SRC, R0, NIN2, CN, NDIS }); else do({ `M 2, OUT, T, F, F, R0, SRC, R0, NIN2, CN, NDIS }); // in_2 (B) = out do({ `M 0, OUT, F, T, F, NALU, CN, NDIS }); // compute carry for in_2 + 1 do({ NMEM, NALU, C1, NDIS }); // out (-B) = in_2 + c do({ NMEM, RR, SRC, RR, SUM12S, CN, NDIS }); // if original B < 0, out -> mem[2] (+|B|), // else original B >= 0, out -> mem[3] (-|B|) if (B_neg) do({ `M 2, OUT, T, F, F, NALU, CN, NDIS }); else do({ `M 3, OUT, T, F, F, NALU, CN, NDIS }); repeat(WORD_WIDTH) begin // if east (R) < 0, mem[2] -> in_2 (+|B|)... // else R >= 0, mem[3] -> in_2 (-|B|)... // out (R) = east shifted left with MSB of A if (e_out) do({ `M 2, MEM, F, T, F, RE, ZEROS, RE, SRC, CN, NDIS }); else do({ `M 3, MEM, F, T, F, RE, ZEROS, RE, SRC, CN, NDIS }); // out -> in_1 (R), east (A) = r0 do({ `M 0, OUT, F, F, T, R0, SRC, RE, ZEROS, CN, NDIS }); // compute carry for in_1 (R) + in_2 (+/-B) do({ NMEM, NALU, C0, NDIS }); // out = in_1 (R) + in_2 (+/-B) do({ NMEM, RR, SRC, RR, SUM12S, CN, NDIS }); // if msb(out) (R < 0), r0 (A) = east shifted left with 0... // otherwise R >= 0, r0 (A) = east shifted left with 1... // out -> in_1 (R +/- B), save new MSB of R0 (A) e_in = !flags_0[OUT_MSB_F]; do({ `M 0, OUT, F, F, T, RE, SRC, R0, ZEROS, CN, NDIS }); e_in = flags_0[REG_MSB_F]; // mem[2] -> in_2 (+|B|), east (R) & out = in_1 (R +/- B) do({ `M 2, MEM, F, T, F, RE, IN1, RE, IN1, CN, NDIS }); end if (flags_0[OUT_MSB_F]) // out (R) < 0 begin // R = R + B // out -> in_1 do({ `M 0, OUT, F, F, T, NALU, CN, NDIS }); // compute carry for in_1 (R) + in_2 (+B) do({ NMEM, NALU, C0, NDIS }); // out = in_1 (R) + in_2 (+B) do({ NMEM, RR, SRC, RR, SUM12S, CN, NDIS }); end else // out (R) >= 0 begin // do nothing // out -> in_1 do({ `M 0, OUT, F, F, T, NALU, CN, NDIS }); // do nothing do({ NOOP }); // out = in_1 do({ NMEM, R0, SRC, R0, IN1, CN, NDIS }); end if (neg_R) // want negative remainder begin // out -> in_2, clear out do({ `M 3, OUT, F, T, F, NALU, CN, NDIS }); // clear in_1, out = ~in_2 do({ `M 3, OUT, F, F, T, R0, SRC, R0, NIN2, CN, NDIS }); // out -> in_2 do({ `M 3, OUT, F, T, F, NALU, CN, NDIS }); // compute carry for in_2 + 1 do({ NMEM, NALU, C1, NDIS }); // out = in_2 + 1 do({ NMEM, RR, SRC, RR, SUM12S, CN, NDIS }); // out -> mem[3] do({ `M 3, OUT, T, F, F, NALU, CN, NDIS }); end else // want positive remainder begin // out -> mem[3] do({ `M 3, OUT, T, F, F, NALU, CN, NDIS }); // filler cycles repeat (5) do({ NOOP }); end if (neg_Q) // want negative quotient begin // clear in_1, out = ~R0 do({ `M 2, OUT, F, F, T, R0, SRC, R0, NSRC, CN, NDIS }); // out -> in_2 do({ `M 2, OUT, F, T, F, NALU, CN, NDIS }); // compute carry for in_2 + 1 do({ NMEM, NALU, C1, NDIS }); // out = in_2 + 1 do({ NMEM, RR, SRC, RR, SUM12S, CN, NDIS }); // out -> mem[2] do({ `M 2, OUT, T, F, F, NALU, CN, NDIS }); end else // want positive quotient begin // out = R0 do({ NMEM, R0, SRC, R0, SRC, CN, NDIS }); // out -> mem[2] do({ `M 2, OUT, T, F, F, NALU, CN, NDIS }); // filler cycles repeat (3) do({ NOOP }); end dump; $display("PE_0 unsigned: %0d / %0d = %0d, R %0d, div_0 %0d", pe_0.mem[0], pe_0.mem[1], pe_0.mem[2], pe_0.mem[3], div_0); $display("%0d clock cycles", clock_ct); end task reset_regs; begin #NS reset = 1; #NS reset = 0; end endtask task do; input [ADDR_WIDTH+34-1:0] i_0; begin instr_0 = i_0; #NS clock = 1; #NS clock = 0; clock_ct = clock_ct + 1; end endtask task dump; begin #NS dump_r_0 = 1; #NS dump_r_0 = 0; #NS dump_m_0 = 1; #NS dump_m_0 = 0; end endtask task form_feed; $write("\14"); endtask endmodule Host command: verilog Command arguments: pe.v pc10-.v VERILOG-XL 1.6a.4 log file created Jul 14, 1994 19:14:43 * Copyright Cadence Design Systems, Inc. 1985, 1988. * * All Rights Reserved. Licensed Software. * * Confidential and proprietary information which is the * * property of Cadence Design Systems, Inc. * Compiling source file "pe.v" Compiling source file "pc10-.v" Warning! Port sizes differ in port connection (port 7) [Verilog-PCDPC] "pc10-.v", 30: 0 Warning! Port sizes differ in port connection (port 14) [Verilog-PCDPC] "pc10-.v", 30: 0 Highest level modules: pe0 pe2 pe3 pe4 pc10 Division, Integer, Non-Restoring, 2s-Complement, 32-Bit, 1 PE PE "PE_0" Reset, Clock Cycle # 0 PE "PE_0" Port and Register Dump # 0, Clock Cycle # 217 PE_INSTR: mb_addr 0000 mb_srce mb_d_mem mb_d_in2 mb_d_in1 srce_reg reg_res dest_reg 1 0 0 0 0000 10101010 0000 out_res carry alu_dis alu_dis_i mb_dis mb_dis_i (p) 00000000 00 0 0 0 0 FLAGS: reg<0 reg=0 out<0 out=0 in2<0 in2=0 in1<0 in1=0 0 0 0 1 0 0 1 0 disab carry (r) 0 1 IN1: 11111111111111111111111111111110 (r) IN2: 00000000000000000000000000000011 (r) OUT: 00000000000000000000000000000000 (r) R0: 00101010101010101010101010101010 (r) R1: 00000000000000000000000000000000 (r) R2: 00000000000000000000000000000000 (r) R3: 00000000000000000000000000000000 (r) R4: 00000000000000000000000000000000 (r) R5: 00000000000000000000000000000000 (r) R6: 00000000000000000000000000000000 (r) R7: 00000000000000000000000000000000 (r) R8: 00000000000000000000000000000000 (r) R9: 00000000000000000000000000000000 (r) DISABLE: 00000000000000000000000000000000 (r) ROUTER: 11111111111111111111111111111100 (r) R_IN: 11111111111111111111111111111100 (p/r) NORTH: 00000000000000000000000000000000 (r) N_IN: 00000000000000000000000000000000 (p) SOUTH: 00000000000000000000000000000000 (r) S_IN: 00000000000000000000000000000000 (p) EAST: 11111111111111111111111111111110 (r) E_IN: 0 (p) W_IN: 0 (p) WEST: 00000000000000000000000000000000 (r) PE "PE_0" Memory Dump # 0, Clock Clycle # 217 0: 7fffffff 00000003 2aaaaaaa 00000001 4: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 8: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx c: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx PE_0 unsigned: 2147483647 / 3 = 715827882, R 1, div_0 0 217 clock cycles 36 warnings 29291 simulation events CPU time: 1.3 secs to compile + 0.4 secs to link + 4.4 secs in simulation End of VERILOG-XL 1.6a.4 Jul 14, 1994 19:14:49 module pc11; // pc11-.v // a Verilog-XL behavioral model of a reconfigurable processor configuration // for a 2's-complement integer sequential non-restoring divide with 2 PEs // using fixed-(1-bit)-shift EAST & WEST registers // (highest level module; requires module pe in file pe.v) // for Dr. W. B. Ligon, E&CE dept., Clemson U., 1992-4 // by Ken Winiecki // system parameters: `define ADDR_WIDTH 4 // width of PE memory address, in bits parameter WORD_WIDTH = 32, // width of PE word, in bits MEM_LENGTH = 16, // length of PE memory, in words MHZ = 16.7, // clock speed, in "megahertz" NS = 500/MHZ, // clock half-period, in "nanoseconds" ADDR_WIDTH = `ADDR_WIDTH; // Had to use a "`define" above to // circumnavigate a Verilog-XL bug that // prevents parameters from working as // bit length specifiers!!! // variable declarations: reg clock, reset, dump_r_0, dump_m_0, dump_r_1, dump_m_1, A_neg, B_neg, neg_Q, neg_R, e_in_0, div_0; reg [ADDR_WIDTH+34-1:0] instr_0, instr_1; wire e0_e1, e_out_1; wire [9:0] flags_0, flags_1; integer clock_ct; // PE instances and connections: pe pe_0 (clock, reset, instr_0, flags_0, 0, 0, e_in_0, 0, 0, , , e0_e1, , , 0,,,,, dump_r_0, dump_m_0); pe pe_1 (clock, reset, instr_1, flags_1, 0, 0, e0_e1, 0, 0, , , e_out_1, , , 0,,,,, dump_r_1, dump_m_1); defparam // set PE-instance parameters to system parameters: pe_0.ADDR_WIDTH = ADDR_WIDTH, pe_0.WORD_WIDTH = WORD_WIDTH, pe_0.MEM_LENGTH = MEM_LENGTH, pe_0.PE_NAME = "PE_0", pe_1.ADDR_WIDTH = ADDR_WIDTH, pe_1.WORD_WIDTH = WORD_WIDTH, pe_1.MEM_LENGTH = MEM_LENGTH, pe_1.PE_NAME = "PE_1"; // PE instruction fields and bit widths: // mb_addr mb_srce mb_d_mem mb_d_in2 mb_d_in1 src_reg ... // (ADDR_WIDTH) 1 1 1 1 4 // reg_res dest_reg out_res carry alu_dis alu_dis_s mb_dis mb_dis_s // 8 4 8 2 1 1 1 1 parameter // PE instruction field values: // mb_addr: memory bus transfer address // mb_srce: source of memory bus transfer is ... OUT = 1'b0, // ... register "OUT" MEM = 1'b1, // ... memory // mb_d_mem & mb_d_in2 & mb_d_in1: destinations of memory bus transfers F = 1'b0, // false T = 1'b1, // true // srce_reg & dest_reg: destination and source registers of ALU operations R0 = 4'b0000, R1 = 4'b0001, R2 = 4'b0010, R3 = 4'b0011, R4 = 4'b0100, R5 = 4'b0101, R6 = 4'b0110, R7 = 4'b0111, R8 = 4'b1000, R9 = 4'b1001, RD = 4'b1010, RR = 4'b1011, RN = 4'b1100, RS = 4'b1101, RE = 4'b1110, RW = 4'b1111, // reg_res & out_res: ALU operands for "OUT" and destination register results IN1 = 8'b11110000, IN2 = 8'b11001100, SRC = 8'b10101010, NIN1 = 8'b00001111, NIN2 = 8'b00110011, NSRC = 8'b01010101, ZEROS = 8'b00000000, ONES = 8'b11111111, AND12 = 8'b11000000, OR12 = 8'b11111100, XOR12 = 8'b00111100, NAND12 = 8'b00111111, NOR12 = 8'b00000011, NXOR12 = 8'b11000011, AND1S = 8'b10100000, OR1S = 8'b11111010, XOR1S = 8'b01011010, NAND1S = 8'b01011111, NOR1S = 8'b00000101, NXOR1S = 8'b10100101, AND2S = 8'b10001000, OR2S = 8'b11101110, XOR2S = 8'b01100110, NAND2S = 8'b01110111, NOR2S = 8'b00010001, NXOR2S = 8'b10011001, AND12S = 8'b10000000, OR12S = 8'b11111110, XOR12S = 8'b01111110, NAND12S = 8'b01111111, NOR12S = 8'b00000001, NXOR12S = 8'b10000001, SUM12S = 8'b10010110, // carry: // Note: The PE only does EITHER a carry operation OR an ALU operation in // one instruction cycle. A carry word is computed using registers IN_1 // and IN_2 and the specified carry-in, and is made available at the input // from the router, r_in. The carry-out is placed in the carry flag of the // p_flags register. CN = 2'b00, // do not compute carry (do an ALU operation) CC = 2'b01, // compute carry using carry flag for carry-in C0 = 2'b10, // compute carry using 0 for carry-in C1 = 2'b11; // compute carry using 1 for carry-in // alu_dis & mb_dis: allow disabling of ALU/memory bus operation; T or F // alu_dis_i & mb_dis_i: invert PE disable bit for ALU/memory bus op; T or F // convenience parameters and definitions for writing PE instructions: `define M `ADDR_WIDTH'h // when specifying memory address (in hexidecimal) parameter NMEM = { `M 0, MEM, F, F, F }; // when not using memory bus parameter NALU = { R0, SRC, R0, ZEROS }; // when not doing ALU ops // (note that flags are affected and OUT is cleared) parameter NDIS = { F, F, F, F }; // when not allowing any disabling parameter NOOP = { NMEM, NALU, CN, NDIS }; // when squandering clock cycles // (note that flags are affected and OUT is cleared) parameter // bit positions of PE flags // Note that flags other than carry are not affected by a carry operation, // and the carry flag is not affected by an ALU operation. REG_MSB_F = 9, // m.s.b. of destination register REG_ZER_F = 8, // if destination register = 0 OUT_MSB_F = 7, // m.s.b. of register OUT OUT_ZER_F = 6, // if register OUT = 0 IN2_MSB_F = 5, // m.s.b. of register IN_2 IN2_ZER_F = 4, // if register IN_2 = 0 IN1_MSB_F = 3, // m.s.b. of register IN_1 IN1_ZER_F = 2, // if register IN_1 = 0 DISABLE_F = 1, // state of PE disable bit CARRY_F = 0; // result of last carry operation // Write a program in Verilog-XL to control the processing elements directly, // and put it in the "initial" block to execute. Set up the PE memory. // Reset the PE by clocking it's p_reset line. (Clock a line using #NS to // delay each transition.) Build a PE instruction using the above defined // instruction fields and value parameters. Clock the PEs. Monitor the // flags of a PE using the above bit position parameters. Note: Verilog-XL // has no way of dealing realistically with "don't cares," so never allow the // use of "don't knows" (undefined values)!!! initial begin form_feed; $display("Division, Integer, Non-Restoring, 2s-Complement, %0d-Bit, 2 PEs", WORD_WIDTH); clock = 0; clock_ct = 0; dump_r_0 = 0; dump_m_0 = 0; dump_r_1 = 0; dump_m_1 = 0; pe_0.mem[0] = 32'h7fffffff; pe_1.mem[0] = 32'h00000003; // clear all registers reset_regs; // 0: mem[0] -> in_2 (A) // 1: mem[0] -> in_2 (B) instr_0 = { `M 0, MEM, F, T, F, NALU, CN, NDIS }; instr_1 = { `M 0, MEM, F, T, F, NALU, CN, NDIS }; clockem; // set up conditions // 0: do nothing // 1: out = in_2 (B) div_0 = flags_1[IN2_ZER_F]; A_neg = flags_0[IN2_MSB_F]; B_neg = flags_1[IN2_MSB_F]; neg_Q = A_neg ^ B_neg; neg_R = A_neg; instr_0 = { NOOP }; instr_1 = { NMEM, R0, SRC, R0, IN2, CN, NDIS }; clockem; // 0: if A < 0, out = ~in_2, out->in_2, carry(in_2 + 1), east = in_2 + 1 // else A >= 0, carry(in_2 + 0), east = in_2 + 0 // 1: if B < 0, out -> mem[3]... // else B >= 0, out -> mem[2]... // out = ~in_2, out -> in_2, carry(in_2 + 1), out = in_2 + 1 if (A_neg) instr_0 = { NMEM, R0, SRC, R0, NIN2, CN, NDIS }; else instr_0 = { NOOP }; if (B_neg) instr_1 = { `M 3, OUT, T, F, F, R0, SRC, R0, NIN2, CN, NDIS }; else instr_1 = { `M 2, OUT, T, F, F, R0, SRC, R0, NIN2, CN, NDIS }; clockem; if (A_neg) instr_0 = { `M 0, OUT, F, T, F, NALU, CN, NDIS }; else instr_0 = { NOOP }; instr_1 = { `M 0, OUT, F, T, F, NALU, CN, NDIS }; clockem; if (A_neg) instr_0 = { NMEM, NALU, C1, NDIS }; else instr_0 = { NMEM, NALU, C0, NDIS }; instr_1 = { NMEM, NALU, C1, NDIS }; clockem; instr_0 = { NMEM, RR, SUM12S, RE, ZEROS, CN, NDIS }; instr_1 = { NMEM, RR, SRC, RR, SUM12S, CN, NDIS }; clockem; // 0: do nothing // 1: if original B < 0, out -> mem[2] (+|B|), // otherwise orig B >= 0, out -> mem[3] (-|B|) instr_0 = { NOOP }; if (B_neg) instr_1 = { `M 2, OUT, T, F, F, NALU, CN, NDIS }; else instr_1 = { `M 3, OUT, T, F, F, NALU, CN, NDIS }; clockem; repeat(WORD_WIDTH) begin // 0: do nothing // 1: if east (R) < 0, mem[2] -> in_2 (+|B|)... // else R >= 0, mem[3] -> in_2 (-|B|)... // out (R) = east shifted left with MSB of A instr_0 = { NOOP }; if (e_out_1) instr_1 = { `M 2, MEM, F, T, F, RE, ZEROS, RE, SRC, CN, NDIS }; else instr_1 = { `M 3, MEM, F, T, F, RE, ZEROS, RE, SRC, CN, NDIS }; clockem; // 0: do nothing // 1: out -> in_1 (R) instr_0 = { NOOP }; instr_1 = { `M 0, OUT, F, F, T, NALU, CN, NDIS }; clockem; // 0: do nothing // 1: compute carry for in_1 (R) + in_2 (+/-B) instr_0 = { NOOP }; instr_1 = { NMEM, NALU, C0, NDIS }; clockem; // 0: do nothing // 1: mem[2] (+B) -> in_2, east & out = in_1 (R) + in_2 (+/-B) instr_0 = { NOOP }; instr_1 = { `M 2, MEM, F, T, F, RR, SUM12S, RE, SUM12S, CN, NDIS }; clockem; // 0: east (A) & out = east shifted left with !(sign of new R) // 1: out -> in_1 e_in_0 = !flags_1[OUT_MSB_F]; instr_0 = { NMEM, RE, SRC, RE, SRC, CN, NDIS }; instr_1 = { `M 0, OUT, F, F, T, NALU, CN, NDIS }; clockem; end if (flags_1[IN1_MSB_F]) // out (R) < 0 begin // R = R + B // 0: out -> in_1 // 1: compute carry for in_1 (R) + in_2 (+B) instr_0 = { `M 0, OUT, F, F, T, NALU, CN, NDIS }; instr_1 = { NMEM, NALU, C0, NDIS }; clockem; // 0: out = in_1 // 1: out = in_1 (R) + in_2 (+B) instr_0 = { NMEM, R0, SRC, R0, IN1, CN, NDIS }; instr_1 = { NMEM, RR, SRC, RR, SUM12S, CN, NDIS }; clockem; end else // out (R) >= 0 begin // do nothing // 0: out -> in_1 // 1: do nothing instr_0 = { `M 0, OUT, F, F, T, NALU, CN, NDIS }; instr_1 = { NOOP }; clockem; // both: out = in_1 instr_0 = { NMEM, R0, SRC, R0, IN1, CN, NDIS }; instr_1 = instr_0; clockem; end // 0: if want negative quotient, then... // 1: if want negative remainder, then... // both: out -> in_2, clear out, clear in_1, out = ~in_2, out -> in_2, // compute carry for in_2 + 1, out = in_2 + 1, out -> mem[1]; // otherwise, out -> mem[1] if (neg_Q) instr_0 = { `M 1, OUT, F, T, F, NALU, CN, NDIS }; else instr_0 = { `M 1, OUT, T, F, F, NALU, CN, NDIS }; if (neg_R) instr_1 = { `M 1, OUT, F, T, F, NALU, CN, NDIS }; else instr_1 = { `M 1, OUT, T, F, F, NALU, CN, NDIS }; clockem; if (neg_Q) instr_0 = { `M 1, OUT, F, F, T, R0, SRC, R0, NIN2, CN, NDIS }; else instr_0 = { NOOP }; if (neg_R) instr_1 = { `M 1, OUT, F, F, T, R0, SRC, R0, NIN2, CN, NDIS }; else instr_1 = { NOOP }; clockem; if (neg_Q) instr_0 = { `M 1, OUT, F, T, F, NALU, CN, NDIS }; else instr_0 = { NOOP }; if (neg_R) instr_1 = { `M 1, OUT, F, T, F, NALU, CN, NDIS }; else instr_1 = { NOOP }; clockem; if (neg_Q) instr_0 = { NMEM, NALU, C1, NDIS }; else instr_0 = { NOOP }; if (neg_R) instr_1 = { NMEM, NALU, C1, NDIS }; else instr_1 = { NOOP }; clockem; if (neg_Q) instr_0 = { NMEM, RR, SRC, RR, SUM12S, CN, NDIS }; else instr_0 = { NOOP }; if (neg_R) instr_1 = { NMEM, RR, SRC, RR, SUM12S, CN, NDIS }; else instr_1 = { NOOP }; clockem; if (neg_Q) instr_0 = { `M 1, OUT, T, F, F, NALU, CN, NDIS }; else instr_0 = { NOOP }; if (neg_R) instr_1 = { `M 1, OUT, T, F, F, NALU, CN, NDIS }; else instr_1 = { NOOP }; clockem; dump; $display("Unsigned: %0d / %0d = %0d, R %0d, div_0 %0d", pe_0.mem[0], pe_1.mem[0], pe_0.mem[1], pe_1.mem[1], div_0); $display("%0d clock cycles", clock_ct); end task reset_regs; begin #NS reset = 1; #NS reset = 0; end endtask task clockem; begin #NS clock = 1; #NS clock = 0; clock_ct = clock_ct + 1; end endtask task dump; begin #NS dump_r_0 = 1; #NS dump_r_0 = 0; #NS dump_m_0 = 1; #NS dump_m_0 = 0; form_feed; #NS dump_r_1 = 1; #NS dump_r_1 = 0; #NS dump_m_1 = 1; #NS dump_m_1 = 0; end endtask task form_feed; $write("\14"); endtask endmodule Host command: verilog Command arguments: pe.v pc11-.v VERILOG-XL 1.6a.4 log file created Jul 14, 1994 19:14:54 * Copyright Cadence Design Systems, Inc. 1985, 1988. * * All Rights Reserved. Licensed Software. * * Confidential and proprietary information which is the * * property of Cadence Design Systems, Inc. * Compiling source file "pe.v" Compiling source file "pc11-.v" Highest level modules: pe0 pe2 pe3 pe4 pc11 Division, Integer, Non-Restoring, 2s-Complement, 32-Bit, 2 PEs PE "PE_1" Reset, Clock Cycle # 0 PE "PE_0" Reset, Clock Cycle # 0 PE "PE_0" Port and Register Dump # 0, Clock Cycle # 175 PE_INSTR: mb_addr 0000 mb_srce mb_d_mem mb_d_in2 mb_d_in1 srce_reg reg_res dest_reg 1 0 0 0 0000 10101010 0000 out_res carry alu_dis alu_dis_i mb_dis mb_dis_i (p) 00000000 00 0 0 0 0 FLAGS: reg<0 reg=0 out<0 out=0 in2<0 in2=0 in1<0 in1=0 0 1 0 1 0 0 0 0 disab carry (r) 0 0 IN1: 00101010101010101010101010101010 (r) IN2: 01111111111111111111111111111111 (r) OUT: 00000000000000000000000000000000 (r) R0: 00000000000000000000000000000000 (r) R1: 00000000000000000000000000000000 (r) R2: 00000000000000000000000000000000 (r) R3: 00000000000000000000000000000000 (r) R4: 00000000000000000000000000000000 (r) R5: 00000000000000000000000000000000 (r) R6: 00000000000000000000000000000000 (r) R7: 00000000000000000000000000000000 (r) R8: 00000000000000000000000000000000 (r) R9: 00000000000000000000000000000000 (r) DISABLE: 00000000000000000000000000000000 (r) ROUTER: 00000000000000000000000000000000 (r) R_IN: 00000000000000000000000000000000 (p/r) NORTH: 00000000000000000000000000000000 (r) N_IN: 00000000000000000000000000000000 (p) SOUTH: 00000000000000000000000000000000 (r) S_IN: 00000000000000000000000000000000 (p) EAST: 00101010101010101010101010101010 (r) E_IN: 0 (p) W_IN: 0 (p) WEST: 00000000000000000000000000000000 (r) PE "PE_0" Memory Dump # 0, Clock Clycle # 175 0: 7fffffff 2aaaaaaa xxxxxxxx xxxxxxxx 4: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 8: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx c: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx PE "PE_1" Port and Register Dump # 0, Clock Cycle # 175 PE_INSTR: mb_addr 0000 mb_srce mb_d_mem mb_d_in2 mb_d_in1 srce_reg reg_res dest_reg 1 0 0 0 0000 10101010 0000 out_res carry alu_dis alu_dis_i mb_dis mb_dis_i (p) 00000000 00 0 0 0 0 FLAGS: reg<0 reg=0 out<0 out=0 in2<0 in2=0 in1<0 in1=0 0 1 0 1 0 0 1 0 disab carry (r) 0 1 IN1: 11111111111111111111111111111110 (r) IN2: 00000000000000000000000000000011 (r) OUT: 00000000000000000000000000000000 (r) R0: 00000000000000000000000000000000 (r) R1: 00000000000000000000000000000000 (r) R2: 00000000000000000000000000000000 (r) R3: 00000000000000000000000000000000 (r) R4: 00000000000000000000000000000000 (r) R5: 00000000000000000000000000000000 (r) R6: 00000000000000000000000000000000 (r) R7: 00000000000000000000000000000000 (r) R8: 00000000000000000000000000000000 (r) R9: 00000000000000000000000000000000 (r) DISABLE: 00000000000000000000000000000000 (r) ROUTER: 11111111111111111111111111111100 (r) R_IN: 11111111111111111111111111111100 (p/r) NORTH: 00000000000000000000000000000000 (r) N_IN: 00000000000000000000000000000000 (p) SOUTH: 00000000000000000000000000000000 (r) S_IN: 00000000000000000000000000000000 (p) EAST: 11111111111111111111111111111110 (r) E_IN: 0 (p) W_IN: 0 (p) WEST: 00000000000000000000000000000000 (r) PE "PE_1" Memory Dump # 0, Clock Clycle # 175 0: 00000003 00000001 00000003 fffffffd 4: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 8: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx c: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx Unsigned: 2147483647 / 3 = 715827882, R 1, div_0 0 175 clock cycles 38 warnings 46137 simulation events CPU time: 1.4 secs to compile + 0.6 secs to link + 6.9 secs in simulation End of VERILOG-XL 1.6a.4 Jul 14, 1994 19:15:03 module pc12; // pc12-3.v // a Verilog-XL behavioral model of a reconfigurable processor configuration // for an integer 2's complement full add/subtract with 1 PE // (using complementation and variable-bit-shift EAST & WEST registers) // (highest level module; requires module pe in file pe.v) // for Dr. W. B. Ligon, E&CE dept., Clemson U., 1992-4 // by Ken Winiecki // system parameters: `define WORD_WIDTH 32 // width of PE word, in bits `define ADDR_WIDTH 4 // width of PE memory address, in bits `define MEM_LENGTH 16 // length of PE memory, in words `define MHZ 16.7 // clock speed, in "megahertz" // "`define"s are used to circumnavigate a Verilog-XL bug that // prevents "parameter"s from working as bit length specifiers!!! // variable declarations: reg clock, reset, dump_r_0, dump_m_0, subtract, savebit; reg [`ADDR_WIDTH+44-1:0] instr_0; wire [9:0] flags_0; integer clock_ct; // PE instances and connections: pe3 pe_0 (clock, reset, instr_0, flags_0, 0, 0, 0, 0, 0, ,,,,, 0,,,,, dump_r_0, dump_m_0); defparam // set PE-instance parameters to system parameters: pe_0.ADDR_WIDTH = `ADDR_WIDTH, pe_0.WORD_WIDTH = `WORD_WIDTH, pe_0.MEM_LENGTH = `MEM_LENGTH, pe_0.PE_NAME = "PE_0"; // PE instruction fields and bit widths: // // mb_addr mb_srce mb_d_mem mb_d_in2 mb_d_in1 ... // (`ADDR_WIDTH) 1 1 1 1 // // src_reg shift reg_res dest_reg out_res ... // 4 6 8 4 8 // // car_in1 car_in2 car_nin1 car_nin2 car_srce car_val ... // 1 1 1 1 1 1 // // alu_dis alu_dis_s mb_dis mb_dis_s // 1 1 1 1 parameter // PE instruction field descriptions and values: // mb_addr: memory bus transfer address // mb_srce: source of memory bus transfer is ... OUT = 1'b0, // ... register "OUT" MEM = 1'b1, // ... memory // mb_d_mem & mb_d_in2 & mb_d_in1: destinations of memory bus transfers F = 1'b0, // false T = 1'b1, // true // srce_reg & dest_reg: destination and source registers of ALU operations R0 = 4'b0000, R1 = 4'b0001, R2 = 4'b0010, R3 = 4'b0011, R4 = 4'b0100, R5 = 4'b0101, R6 = 4'b0110, R7 = 4'b0111, R8 = 4'b1000, R9 = 4'b1001, RD = 4'b1010, RR = 4'b1011, RN = 4'b1100, RS = 4'b1101, RE = 4'b1110, RW = 4'b1111, // shift: shift east or west source register "shift" bits before using // reg_res & out_res: ALU operations for "OUT" and destination reg results IN1 = 8'b11110000, IN2 = 8'b11001100, SRC = 8'b10101010, NIN1 = 8'b00001111, NIN2 = 8'b00110011, NSRC = 8'b01010101, ZEROS = 8'b00000000, ONES = 8'b11111111, AND12 = 8'b11000000, OR12 = 8'b11111100, XOR12 = 8'b00111100, NAND12 = 8'b00111111, NOR12 = 8'b00000011, NXOR12 = 8'b11000011, AND1S = 8'b10100000, OR1S = 8'b11111010, XOR1S = 8'b01011010, NAND1S = 8'b01011111, NOR1S = 8'b00000101, NXOR1S = 8'b10100101, SUMN1S = 8'b10100101, AND2S = 8'b10001000, OR2S = 8'b11101110, XOR2S = 8'b01100110, NAND2S = 8'b01110111, NOR2S = 8'b00010001, NXOR2S = 8'b10011001, SUMN2S = 8'b10011001, AND12S = 8'b10000000, OR12S = 8'b11111110, XOR12S = 8'b01111110, NAND12S = 8'b01111111, NOR12S = 8'b00000001, NXOR12S = 8'b10000001, SUM12S = 8'b10010110, SUM1N2S = 8'b01101001, SUM2N1S = SUM1N2S; // Note: The PE only does EITHER an ALU OR a carry operation in one // instruction cycle. A carry word is computed using registers IN_1 and/or // IN/2 and a carry-in bit, and a carry operation is implied by specifying // the registers to use. The carry word is made available at the input // from the router, r_in, and the carry-out is placed in the carry flag of // the p_flags register. // car_in1: use "IN_1" in carry word computation; T or F // car_in2: use "IN_2" in carry word computation; T or F // car_nin1: use inverse of "IN_1" (if use was specified); T or F // car_nin2: use inverse of "IN_2" (if use was specified); T or F // car_srce: use value for carry-in (or else use carry flag); T or F // car_val: use 1 for value of carry-in (if use was specified); T or F // alu_dis & mb_dis: allow disabling of ALU/memory bus operation; T or F // alu_dis_i & mb_dis_i: invert PE disable bit for ALU/memory bus op; T or F parameter // bit positions of PE flags: // Note that flags other than carry are not affected by a carry operation, // and the carry flag is not affected by an ALU operation. REG_MSB_F = 9, // m.s.b. of destination register REG_ZER_F = 8, // if destination register = 0 OUT_MSB_F = 7, // m.s.b. of register OUT OUT_ZER_F = 6, // if register OUT = 0 IN2_MSB_F = 5, // m.s.b. of register IN_2 IN2_ZER_F = 4, // if register IN_2 = 0 IN1_MSB_F = 3, // m.s.b. of register IN_1 IN1_ZER_F = 2, // if register IN_1 = 0 DISABLE_F = 1, // state of PE disable bit CARRY_F = 0; // carry-out of last carry operation // convenience parameters and definitions for writing PE instructions: parameter NS = 500/`MHZ; // when specifying clock timing `define W `WORD_WIDTH'h // for specifying memory values (in hex) `define M `ADDR_WIDTH'h // for specifying memory address (in hex) parameter MIN1 = { MEM, F, F, T }; // when moving memory only to "IN_1" parameter MIN2 = { MEM, F, T, F }; // when moving memory only to "IN_2" parameter OMEM = { OUT, T, F, F }; // when moving "OUT" only to memory parameter OIN1 = { `M 0, OUT, F, F, T }; // when moving "OUT" only to "IN_1" parameter OIN2 = { `M 0, OUT, F, T, F }; // when moving "OUT" only to "IN_2" parameter NMEM = { `M 0, OUT, F, F, F }; // when not using memory bus `define SH 6'd // for specifying shift (in decimal) parameter NSH = `SH 0; // when not shifting (not using east/west srce regs) `define I 8'b // for specifying ALU operation parameter NALU = { R0, NSH, SRC, R0, ZEROS }; // when not doing ALU ops // (note that flags are affected and OUT is cleared) parameter NCAR = { F, F, F, F, F, F }; // when not doing carry operations parameter CM1 = { NALU, T, F, T, F, T, T }; // for carry to negate "IN_1" parameter CM2 = { NALU, F, T, F, T, T, T }; // for carry to negate "IN_2" parameter C1P2 = { NALU, T, T, F, F, T, F }; // for carry for "IN_1" + "IN_2" parameter C1M2 = { NALU, T, T, F, T, T, T }; // for carry for "IN_1" - "IN_2" parameter C2M1 = { NALU, T, T, T, F, T, T }; // for carry for "IN_2" - "IN_1" parameter CI1 = { NALU, T, F, F, F, T, T }; // for carry to increment "IN_1" parameter CI2 = { NALU, F, T, F, F, T, T }; // for carry to increment "IN_2" parameter NDIS = { F, F, F, F }; // when not allowing any disabling parameter NOOP = { NMEM, NALU, NCAR, NDIS }; // when squandering clock cycles // (note that flags are affected and OUT is cleared) // Write a program in Verilog-XL to control the processing elements directly, // and put it in the "initial" block to execute. Set up the PE memory. // Reset the PE by clocking it's p_reset line. (Clock a line using #NS to // delay each transition.) Build a PE instruction using the above defined // instruction fields and value parameters. Clock the PEs. Monitor the // flags of a PE using the above bit position parameters. Note: Verilog-XL // has no way of dealing realistically with "don't cares," so never allow the // use of "don't knows" (undefined values)!!! // memory address definitions parameter A_OP1 = `M 0, // address of first operand A_OP2 = `M 1, // address of second operand A_RES = `M 2; // address of result initial begin form_feed; $display("Addition/Subtraction, Integer, Full, 32-Bit, 1 PE w/C&VS"); clock = 0; clock_ct = 0; dump_r_0 = 0; dump_m_0 = 0; subtract = 1; pe_0.mem[A_OP1] = `W 00000003; pe_0.mem[A_OP2] = `W 00000004; reset_regs; do({ A_OP1, MIN1, NALU, NCAR, NDIS }); do({ A_OP2, MIN2, NALU, NCAR, NDIS }); if (subtract) do({ NMEM, C1M2, NDIS }); else do({ NMEM, C1P2, NDIS }); if (subtract) do({ NMEM, RR, NSH, SRC, RR, SUM1N2S, NCAR, NDIS }); else do({ NMEM, RR, NSH, SRC, RR, SUM12S, NCAR, NDIS }); savebit = flags_0[OUT_MSB_F]; do({ A_RES, OMEM, NALU, NCAR, NDIS }); dump; if (subtract) $display("PE_0 unsigned: %0d - %0d = %0d, carry = %0d, msb = %0d", pe_0.mem[A_OP1], pe_0.mem[A_OP2], pe_0.mem[A_RES], flags_0[CARRY_F], savebit); else $display("PE_0 unsigned: %0d + %0d = %0d, carry = %0d, msb = %0d", pe_0.mem[A_OP1], pe_0.mem[A_OP2], pe_0.mem[A_RES], flags_0[CARRY_F], savebit); $display("%0d clock cycles", clock_ct); end task reset_regs; begin #NS reset = 1; #NS reset = 0; end endtask task do; input [`ADDR_WIDTH+44-1:0] i_0; begin instr_0 = i_0; #NS clock = 1; #NS clock = 0; clock_ct = clock_ct + 1; end endtask task dump; begin #NS dump_r_0 = 1; #NS dump_r_0 = 0; #NS dump_m_0 = 1; #NS dump_m_0 = 0; end endtask task form_feed; $write("\14"); endtask endmodule Host command: verilog Command arguments: pe.v pc12-3.v VERILOG-XL 1.6a.4 log file created Jul 14, 1994 19:15:08 * Copyright Cadence Design Systems, Inc. 1985, 1988. * * All Rights Reserved. Licensed Software. * * Confidential and proprietary information which is the * * property of Cadence Design Systems, Inc. * Compiling source file "pe.v" Compiling source file "pc12-3.v" Highest level modules: pe0 pe pe2 pe4 pc12 Addition/Subtraction, Integer, Full, 32-Bit, 1 PE w/C&VS PE "PE_0" Reset, Clock Cycle # 0 PE "PE_0" Port and Register Dump # 0, Clock Cycle # 5 PE_INSTR: mb_addr 0010 mb_srce mb_d_mem mb_d_in2 mb_d_in1 0 1 0 0 srce_reg shift reg_res dest_reg out_res 0000 000000 10101010 0000 00000000 car_in1 car_in2 car_nin1 car_nin2 car_srce car_val 0 0 0 0 0 0 alu_dis alu_dis_i mb_dis mb_dis_i (p) 0 0 0 0 FLAGS: reg<0 reg=0 out<0 out=0 in2<0 in2=0 in1<0 in1=0 0 1 0 1 0 0 0 0 disab carry (r) 0 0 IN1: 00000000000000000000000000000011 (r) IN2: 00000000000000000000000000000100 (r) OUT: 00000000000000000000000000000000 (r) R0: 00000000000000000000000000000000 (r) R1: 00000000000000000000000000000000 (r) R2: 00000000000000000000000000000000 (r) R3: 00000000000000000000000000000000 (r) R4: 00000000000000000000000000000000 (r) R5: 00000000000000000000000000000000 (r) R6: 00000000000000000000000000000000 (r) R7: 00000000000000000000000000000000 (r) R8: 00000000000000000000000000000000 (r) R9: 00000000000000000000000000000000 (r) DISABLE: 00000000000000000000000000000000 (r) ROUTER: 00000000000000000000000000000111 (r) R_IN: 00000000000000000000000000000000 (p/r) Note reading from R_IN is really reading from ROUTER! NORTH: 00000000000000000000000000000000 (r) N_IN: 00000000000000000000000000000000 (p) SOUTH: 00000000000000000000000000000000 (r) S_IN: 00000000000000000000000000000000 (p) EAST: 00000000000000000000000000000000 (r) E_IN: 0 (p) W_IN: 0 (p) WEST: 00000000000000000000000000000000 (r) PE "PE_0" Memory Dump # 0, Clock Clycle # 5 0: 00000003 00000004 ffffffff xxxxxxxx 4: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 8: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx c: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx PE_0 unsigned: 3 - 4 = 4294967295, carry = 0, msb = 1 5 clock cycles 37 warnings 897 simulation events CPU time: 1.3 secs to compile + 0.4 secs to link + 0.2 secs in simulation End of VERILOG-XL 1.6a.4 Jul 14, 1994 19:15:10 module pc13; // pc13-3.v // a Verilog-XL behavioral model of a reconfigurable processor configuration // for a 32-bit floating-point add/subtract with 1 PE // (using complementation and variable-bit-shift EAST & WEST registers) // (highest level module; requires module pe3 in file pe.v) // for Dr. W. B. Ligon, E&CE dept., Clemson U., 1992-4 // by Ken Winiecki // system parameters: `define WORD_WIDTH 32 // width of PE word, in bits `define ADDR_WIDTH 4 // width of PE memory address, in bits `define MEM_LENGTH 16 // length of PE memory, in words `define MHZ 16.7 // clock speed, in "megahertz" // "`define"s are used to circumnavigate a Verilog-XL bug that // prevents "parameter"s from working as bit length specifiers!!! // variable declarations: reg clock, reset, dump_r_0, dump_m_0, subtract, sign_op1, sign_op2, zero_op1, zero_op2, sign_res, done, manthad1, e_in_0, w_in_0, op2_smaller; reg [`WORD_WIDTH-1:0] word; reg [`ADDR_WIDTH+44-1:0] instr_0; wire [9:0] flags_0; integer clock_ct, old_clock_ct; // PE instances and connections: pe3 pe_0 (clock, reset, instr_0, flags_0, 0, 0, e_in_0, w_in_0, 0, , , , , , 0,,,,, dump_r_0, dump_m_0); defparam // set PE-instance parameters to system parameters: pe_0.ADDR_WIDTH = `ADDR_WIDTH, pe_0.WORD_WIDTH = `WORD_WIDTH, pe_0.MEM_LENGTH = `MEM_LENGTH, pe_0.PE_NAME = "PE_0"; // PE instruction fields and bit widths: // // mb_addr mb_srce mb_d_mem mb_d_in2 mb_d_in1 ... // (`ADDR_WIDTH) 1 1 1 1 // // src_reg shift reg_res dest_reg out_res ... // 4 6 8 4 8 // // car_in1 car_in2 car_nin1 car_nin2 car_srce car_val ... // 1 1 1 1 1 1 // // alu_dis alu_dis_s mb_dis mb_dis_s // 1 1 1 1 parameter // PE instruction field descriptions and values: // mb_addr: memory bus transfer address // mb_srce: source of memory bus transfer is ... OUT = 1'b0, // ... register "OUT" MEM = 1'b1, // ... memory // mb_d_mem & mb_d_in2 & mb_d_in1: destinations of memory bus transfers F = 1'b0, // false T = 1'b1, // true // srce_reg & dest_reg: destination and source registers of ALU operations R0 = 4'b0000, R1 = 4'b0001, R2 = 4'b0010, R3 = 4'b0011, R4 = 4'b0100, R5 = 4'b0101, R6 = 4'b0110, R7 = 4'b0111, R8 = 4'b1000, R9 = 4'b1001, RD = 4'b1010, RR = 4'b1011, RN = 4'b1100, RS = 4'b1101, RE = 4'b1110, RW = 4'b1111, // shift: shift east or west source register "shift" bits before using // reg_res & out_res: ALU operations for "OUT" and destination reg results IN1 = 8'b11110000, IN2 = 8'b11001100, SRC = 8'b10101010, NIN1 = 8'b00001111, NIN2 = 8'b00110011, NSRC = 8'b01010101, ZEROS = 8'b00000000, ONES = 8'b11111111, AND12 = 8'b11000000, OR12 = 8'b11111100, XOR12 = 8'b00111100, NAND12 = 8'b00111111, NOR12 = 8'b00000011, NXOR12 = 8'b11000011, AND1S = 8'b10100000, OR1S = 8'b11111010, XOR1S = 8'b01011010, NAND1S = 8'b01011111, NOR1S = 8'b00000101, NXOR1S = 8'b10100101, SUMN1S = NXOR1S, SUM1S = XOR1S, AND2S = 8'b10001000, OR2S = 8'b11101110, XOR2S = 8'b01100110, NAND2S = 8'b01110111, NOR2S = 8'b00010001, NXOR2S = 8'b10011001, SUMN2S = NXOR2S, SUM2S = XOR2S, AND12S = 8'b10000000, OR12S = 8'b11111110, XOR12S = 8'b01111110, NAND12S = 8'b01111111, NOR12S = 8'b00000001, NXOR12S = 8'b10000001, SUM12S = 8'b10010110, SUM1N2S = 8'b01101001, SUM2N1S = SUM1N2S; // Note: The PE only does EITHER an ALU OR a carry operation in one // instruction cycle. A carry word is computed using registers IN_1 and/or // IN/2 and a carry-in bit, and a carry operation is implied by specifying // the registers to use. The carry word is made available at the input // from the router, r_in, and the carry-out is placed in the carry flag of // the p_flags register. // car_in1: use "IN1" in carry word computation; T or F // car_in2: use "IN2" in carry word computation; T or F // car_nin1: use inverse of "IN1" (if use was specified); T or F // car_nin2: use inverse of "IN2" (if use was specified); T or F // car_srce: use value for carry-in (or else use carry flag); T or F // car_val: use 1 for value of carry-in (if use was specified); T or F // alu_dis & mb_dis: allow disabling of ALU/memory bus operation; T or F // alu_dis_i & mb_dis_i: invert PE disable bit for ALU/memory bus op; T or F parameter // bit positions of PE flags: // Note that flags other than carry are not affected by a carry operation, // and the carry flag is not affected by an ALU operation. REG_MSB_F = 9, // m.s.b. of destination register REG_ZER_F = 8, // if destination register = 0 OUT_MSB_F = 7, // m.s.b. of register OUT OUT_ZER_F = 6, // if register OUT = 0 IN2_MSB_F = 5, // m.s.b. of register IN2 IN2_ZER_F = 4, // if register IN2 = 0 IN1_MSB_F = 3, // m.s.b. of register IN1 IN1_ZER_F = 2, // if register IN1 = 0 DISABLE_F = 1, // state of PE disable bit CARRY_F = 0; // carry-out of last carry operation // convenience parameters and definitions for writing PE instructions: parameter NS = 500/`MHZ; // when specifying clock timing `define W `WORD_WIDTH'h // for specifying memory values (in hex) `define M `ADDR_WIDTH'h // for specifying memory address (in hex) parameter MIN1 = { MEM, F, F, T }; // when moving memory only to "IN1" parameter MIN2 = { MEM, F, T, F }; // when moving memory only to "IN2" parameter OMEM = { OUT, T, F, F }; // when moving "OUT" only to memory parameter OIN1 = { `M 0, OUT, F, F, T }; // when moving "OUT" only to "IN1" parameter OIN2 = { `M 0, OUT, F, T, F }; // when moving "OUT" only to "IN2" parameter NMEM = { `M 0, OUT, F, F, F }; // when not using memory bus `define SH 6'd // for specifying shift (in decimal) parameter NSH = `SH 0; // when not shifting (not using east/west srce regs) `define I 8'b // for specifying ALU operation parameter NALU = { R0, NSH, SRC, R0, ZEROS }; // when not doing ALU ops // (note that flags are affected and OUT is cleared) parameter NCAR = { F, F, F, F, F, F }; // when not doing carry operations parameter C1P2 = { NALU, T, T, F, F, T, F }; // for carry for "IN1" + "IN2" parameter C1M2 = { NALU, T, T, F, T, T, T }; // for carry for "IN1" - "IN2" parameter C2M1 = { NALU, T, T, T, F, T, T }; // for carry for "IN2" - "IN1" parameter CM1 = { NALU, T, F, T, F, T, T }; // for carry to negate "IN1" parameter CM2 = { NALU, F, T, F, T, T, T }; // for carry to negate "IN2" parameter CI1 = { NALU, T, F, F, F, T, T }; // for carry to increment "IN1" parameter CI2 = { NALU, F, T, F, F, T, T }; // for carry to increment "IN2" parameter CD1 = { NALU, T, T, F, T, T, F }; // for carry to dec IN1 if IN2=0 parameter CD2 = { NALU, T, T, T, F, T, F }; // for carry to dec IN2 if IN1=0 parameter NDIS = { F, F, F, F }; // when not allowing any disabling parameter NOOP = { NMEM, NALU, NCAR, NDIS }; // when squandering clock cycles // (note that flags are affected and OUT is cleared) // Write a program in Verilog-XL to control the processing elements directly, // and put it in the "initial" block to execute. Set up the PE memory. // Reset the PE by clocking it's p_reset line. (Clock a line using #NS to // delay each transition.) Build a PE instruction using the above defined // instruction fields and value parameters. Clock the PEs. Monitor the // flags of a PE using the above bit position parameters. Note: Verilog-XL // has no way of dealing realistically with "don't cares," so never allow the // use of "don't knows" (undefined values)!!! // 32-bit floating-point representation: // bits 0-22: 23 lower-order bits of 24-bit normalized unsigned mantissa // bits 23-30: 8-bit biased exponent (bias = $7F) // bit 31: 1-bit sign of mantissa // special floating-point values: // all 1s : positive maximum (not infinity) // all 1s except sign : negative maximum (not infinity) // all zeros : zero // all zeros except sign : negative minimum (not zero) // floating-point addition program: // 1. Load and Decode // 2. Align // 3. Add or Subtract // 4. Complement // 5. Normalize // 6. Encode // register assignments: // R0 = operand 1 exponent R1 = operand 1 mantissa // R2 = operand 2 exponent R3 = operand 2 mantissa // R4 = result exponent R5 = result mantissa // R6 = AND-mask for isolating/testing right-justified exponent // memory address definitions: parameter A_OP1 = `M 0, // address of first operand A_OP2 = `M 1, // address of second operand A_RES = `M 2, // address of result A_ANDMAN = `M 3, // address of AND-mask for isolating mantissa A_ORMAN = `M 4, // address of OR-mask for replacing leading 1 of // mantissa A_ANDEXP = `M 5; // address of AND-mask for isolating/testing // right-justified exponent initial begin form_feed; $display("Addition/Subtraction, Floating Point, 32-Bit, 1 PE"); clock = 0; clock_ct = 0; old_clock_ct = 0; dump_r_0 = 0; dump_m_0 = 0; pe_0.mem[A_ANDMAN] = `W 007fffff; // mantissa AND-mask pe_0.mem[A_ORMAN] = `W 00800000; // mantissa OR-mask pe_0.mem[A_ANDEXP] = `W 000000ff; // exponent AND-mask pe_0.mem[A_OP1] = { 1'b 0, 8'h 80, 23'b 10000000000000000000000 }; // 3 pe_0.mem[A_OP2] = { 1'b 0, 8'h 81, 23'b 11000000000000000000000 }; // 7 subtract = 1; reset_regs; //// 1. Load and Decode //// Description: //// Load operands and decode signs, exponents, and mantissas. Handle //// special case zero. //// Actions: //// sign(OP1) = msb(OP1) //// if OP1 == 0 then OP1_MAN = 0 //// else OP1 <> 0 so OP1_MAN = (OP1 & ANDMAN) | ORMAN //// msb(OP2) == sign(OP2) //// if OP2 == 0 then OP2_MAN = 0 //// else OP2 <> 0 so OP2_MAN = (OP2 & ANDMAN) | ORMAN //// OP1_EXP = OP1 >> 23 & ANDEXP //// OP2_EXP = OP2 >> 23 & ANDEXP //// Output State: //// sign(OP1) -> sign_op1 //// OP1_MAN -> R1 //// sign(OP2) -> sign_op2 //// OP2_MAN -> R3 //// OP1_EXP -> R0, IN1 //// OP2_EXP -> R2, IN2 //// ANDEXP -> R6 //// // OP1 -> IN1 // msb(OP1) -> sign_op1 // zero(OP1) -> zero_op1 do({ A_OP1, MIN1, NALU, NCAR, NDIS }); sign_op1 = flags_0[IN1_MSB_F]; zero_op1 = flags_0[IN1_ZER_F]; // IN1 -> R4 (OP1) // OP2 -> IN2 // msb(OP2) -> sign_op2 // zero(OP2) -> zero_op2 do({ A_OP2, MIN2, R0, NSH, IN1, R4, ZEROS, NCAR, NDIS }); sign_op2 = flags_0[IN2_MSB_F]; zero_op2 = flags_0[IN2_ZER_F]; // IN2 -> R5 (OP2) // ORMAN -> IN2 do({ A_ORMAN, MIN2, R0, NSH, IN2, R5, ZEROS, NCAR, NDIS }); // IN1 -> RW (OP1) // ANDMAN -> IN1 do({ A_ANDMAN, MIN1, R0, NSH, IN1, RW, ZEROS, NCAR, NDIS }); if (zero_op1) // 0 -> R1 (OP1_MAN = 0) do({ NMEM, R1, NSH, ZEROS, R1, ZEROS, NCAR, NDIS }); else // (R4 & IN1) | IN2 -> R1 (OP1_MAN = (OP1 & ANDMAN) | ORMAN) do({ NMEM, R4, NSH, `I 11101100, R1, ZEROS, NCAR, NDIS }); if (zero_op2) // 0 -> R3 (OP2_MAN = 0) // ANDEXP -> IN2 do({ A_ANDEXP, MIN2, R3, NSH, ZEROS, R3, ZEROS, NCAR, NDIS }); else // (R5 & IN1) | IN2 -> R3 (OP2_MAN = (OP2 & ANDMAN) | ORMAN) // ANDEXP -> IN2 do({ A_ANDEXP, MIN2, R5, NSH, `I 11101100, R3, ZEROS, NCAR, NDIS }); // (RW >> 23) & IN2 -> R0, OUT (OP1_EXP = OP1 >> 23 & ANDEXP) w_in_0 = 0; do({ NMEM, RW, `SH 23, AND2S, R0, AND2S, NCAR, NDIS }); // R5 -> RW (OP2) // OUT -> IN1 (OP1_EXP) do({ OIN1, R5, NSH, SRC, RW, ZEROS, NCAR, NDIS }); // (RW >> 23) & IN2 -> R2, OUT (OP2_EXP = OP2 >> 23 & ANDEXP) do({ NMEM, RW, `SH 23, AND2S, R2, AND2S, NCAR, NDIS }); // IN2 -> R6 (ANDEXP) // OUT -> IN2 (OP2_EXP) do({ OIN2, R0, NSH, IN2, R6, ZEROS, NCAR, NDIS }); $display("1. Load and Decode: %d cycles", clock_ct - old_clock_ct); old_clock_ct = clock_ct; //// 2. Align //// Description: //// Determine which exponent is smaller. //// For a number of times equal to the maximum number of bits in a //// mantissa including the leading 1, //// increment the smaller exponent, and //// shift the mantissa of the operand with the smaller exponent //// right by one (truncate). //// Use the larger exponent for the result. //// Actions: //// if OP1_EXP - OP2_EXP < 0 then OP2_EXP < OP1_EXP //// else OP1_EXP <= OP2_EXP //// repeat 24: //// if OP1_EXP == OP2_EXP then do nothing //// else if OP2_EXP < OP1_EXP then //// OP2_EXP = OP2_EXP + 1 //// OP2_MAN = OP2_MAN >> 1 //// else OP1_EXP < OP2_EXP so //// OP1_EXP = OP1_EXP + 1 //// OP1_MAN = OP1_MAN >> 1 //// (end repeat) //// if OP2_EXP <= OP1_EXP then RES_EXP = OP1_EXP //// else OP1_EXP < OP2_EXP so RES_EXP = OP2_EXP //// Output state: //// sign(OP1) -> sign_op1 //// OP1_MAN -> R1 //// sign(OP2) -> sign_op2 //// OP2_MAN -> R3 //// OP1_EXP -> R0, IN1 //// OP2_EXP -> R2, IN2 //// RES_EXP -> OUT //// ANDEXP -> R6 //// // carry_word(IN1 (OP1_EXP) - IN2 (OP2_EXP)) -> RR do({ NMEM, C1M2, NDIS }); // IN1 - IN2 + RR -> OUT (EXP_DIFF = OP1_EXP - OP2_EXP) // carry_out(EXP_DIFF) -> op2_smaller do({ NMEM, RR, NSH, SRC, RR, SUM1N2S, NCAR, NDIS }); op2_smaller = flags_0[CARRY_F]; if (op2_smaller) // OP2_EXP <= OP1_EXP // R3 -> RW (OP2_MAN) do({ NMEM, R3, NSH, SRC, RW, ZEROS, NCAR, NDIS }); else // OP1_EXP < OP2_EXP // R1 -> RW (OP1_MAN) do({ NMEM, R1, NSH, SRC, RW, ZEROS, NCAR, NDIS }); repeat (24) begin // IN1 XOR IN2 -> OUT (EXP_DIFF = OP1_EXP XOR OP2_EXP) do({ NMEM, R0, NSH, SRC, R0, XOR12, NCAR, NDIS }); if (flags_0[OUT_ZER_F]) // EXP_DIFF = 0, so OP1_EXP = OP2_EXP begin // squander clock cycle(s) for equalization do(NOOP); do(NOOP); do(NOOP); end else // EXP_DIFF <> 0... if (op2_smaller) // OP2_EXP < OP1_EXP begin // carry_word(IN2 (OP2_EXP) + 1) -> RR do({ NMEM, CI2, NDIS }); // IN2 + 1 + RR -> R2, OUT (OP2_EXP += 1) do({ NMEM, RR, NSH, SUM2S, R2, SUM2S, NCAR, NDIS }); // OUT -> IN2 (OP2_EXP) // RW >> 1 -> RW (OP2_MAN >> 1) do({ OIN2, RW, `SH 1, SRC, RW, ZEROS, NCAR, NDIS }); end else // OP1_EXP < OP2_EXP begin // carry_word(IN1 (OP1_EXP) + 1) -> RR do({ NMEM, CI1, NDIS }); // IN1 + 1 + RR -> R0, OUT (OP1_EXP += 1) do({ NMEM, RR, NSH, SUM1S, R0, SUM1S, NCAR, NDIS }); // OUT -> IN1 (OP1_EXP) // RW >> 1 -> RW (OP1_MAN >> 1) do({ OIN1, RW, `SH 1, SRC, RW, ZEROS, NCAR, NDIS }); end end // of repeat (24) if (op2_smaller) // OP2_EXP <= OP1_EXP // RW -> R3 (OP2_MAN) // IN1 -> OUT (RES_EXP = OP1_EXP) do({ NMEM, RW, NSH, SRC, R3, IN1, NCAR, NDIS }); else // OP1_EXP < OP2_EXP // RW -> R1 (OP1_MAN) // IN2 -> OUT (RES_EXP = OP2_EXP) do({ NMEM, RW, NSH, SRC, R1, IN2, NCAR, NDIS }); $display("2. Align: %d cycles", clock_ct - old_clock_ct); old_clock_ct = clock_ct; //// 3. Add or Subtract //// Description: //// Depending on the signs of the mantissas and the desired operation, //// add or subtract, and determine the sign. //// sign_op1 | sign_op2 | desired || do | sign_res | case //// ----------+-----------+---------++---------+-----------+----- //// + | + | + || 1 + 2 | + | A //// + | + | - || 1 - 2 | ? | B //// + | - | + || 1 - 2 | ? | B //// + | - | _ || 1 + 2 | + | A //// - | + | + || 2 - 1 | ? | C //// - | + | - || 1 + 2 | - | D //// - | - | + || 1 + 2 | - | D //// - | - | - || 2 - 1 | ? | C //// Actions: //// RES_MAN = +/- OP1_MAN +/- OP2_MAN //// sign(RES) = msb(RES_MAN)/+/- //// Output State: //// RES_MAN -> OUT, RE //// RES_EXP -> R4 //// sign(RES) -> sign_res //// ANDEXP -> R6 //// // OUT -> IN1 (RES_EXP) // R1 -> OUT (OP1_MAN) do({ OIN1, R1, NSH, SRC, R1, SRC, NCAR, NDIS }); // IN1 -> R4 (RES_EXP) // OUT -> IN1 (OP1_MAN) // R3 -> OUT (OP2_MAN) do({ OIN1, R3, NSH, IN1, R4, SRC, NCAR, NDIS }); // OUT -> IN2 (OP2_MAN) do({ OIN2, NALU, NCAR, NDIS }); if ( !sign_op1 && !sign_op2 && !subtract || !sign_op1 && sign_op2 && subtract ) // case A begin // carry_word(IN1 (OP1_MAN) + IN2 (OP2_MAN)) -> RR do({ NMEM, C1P2, NDIS }); // IN1 + IN2 + RR -> OUT, RE (RES_MAN = OP1_MAN + OP2_MAN) // positive -> sign(RES) do({ NMEM, RR, NSH, SUM12S, RE, SUM12S, NCAR, NDIS }); sign_res = 0; end else if ( !sign_op1 && !sign_op2 && subtract || !sign_op1 && sign_op2 && !subtract ) // case B begin // carry_word(IN1 (OP1_MAN) - IN2 (OP2_MAN)) -> RR do({ NMEM, C1M2, NDIS }); // IN1 - IN2 + RR -> OUT, RE (RES_MAN = OP1_MAN - OP2_MAN) // sign(OP1_MAN - OP2_MAN) -> sign(RES) do({ NMEM, RR, NSH, SUM1N2S, RE, SUM1N2S, NCAR, NDIS }); sign_res = flags_0[OUT_MSB_F]; end else if ( sign_op1 && !sign_op2 && !subtract || sign_op1 && sign_op2 && subtract ) // case C begin // carry_word(IN2 (OP2_MAN) - IN1 (OP1_MAN)) -> RR do({ NMEM, C2M1, NDIS }); // IN2 - IN1 + RR -> OUT, RE (RES_MAN = OP2_MAN - OP1_MAN) // sign(OP2_MAN - OP1_MAN) -> sign(RES) do({ NMEM, RR, NSH, SUM2N1S, RE, SUM2N1S, NCAR, NDIS }); sign_res = flags_0[OUT_MSB_F]; end else if ( sign_op1 && !sign_op2 && subtract || sign_op1 && sign_op2 && !subtract ) // case D begin // carry_word(IN1 (OP1_MAN) + IN2 (OP2_MAN)) -> RR do({ NMEM, C1P2, NDIS }); // IN1 + IN2 + RR -> OUT, RE (RES_MAN = OP1_MAN + OP2_MAN) // negative -> sign(RES) do({ NMEM, RR, NSH, SUM12S, RE, SUM12S, NCAR, NDIS }); sign_res = 1; end $display("3. Add or Subtract: %d cycles", clock_ct - old_clock_ct); old_clock_ct = clock_ct; //// 4. Complement //// Description: //// If the result mantissa is negative in 2s-complement, change it. //// Actions: //// if msb(RES_MAN) == 1 then RES_MAN < 0 so RES_MAN = - RES_MAN //// Output State: //// RES_MAN -> RE //// RES_EXP -> R4, IN1 //// sign(RES_MAN) -> sign_res //// ANDEXP -> R6 //// zero -> OUT //// if (flags_0[OUT_MSB_F]) // RES_MAN < 0 begin // OUT -> IN2 (RES_MAN) // R4 -> OUT (RES_EXP) do({ OIN2, R4, NSH, SRC, R4, SRC, NCAR, NDIS }); // carry_word ( -IN2 (RES_MAN) ) -> RR // OUT -> IN1 (RES_EXP) do({ OIN1, CM2, NDIS }); // -IN2 + RR -> RE (RES_MAN = -RES_MAN) // 0 -> OUT do({ NMEM, MIN2, RR, NSH, SUMN2S, RE, ZEROS, NCAR, NDIS }); end else // RES_MAN >= 0 begin // squander clock cycle(s) for equalization do( NOOP ); // R4 -> OUT (RES_EXP) do({ NMEM, MIN2, R4, NSH, SRC, R4, SRC, NCAR, NDIS }); // OUT -> IN1 (RES_EXP) // 0 -> OUT do({ OIN1, NALU, NCAR, NDIS }); end $display("4. Complement: %d cycles", clock_ct - old_clock_ct); old_clock_ct = clock_ct; //// 5. Normalize //// Description: //// The maximum number of bits for the result mantissa is one more //// than for the operand mantissa, so increment the result exponent //// to prepare for starting normalization from the additional bit. //// For a number of times equal to the maximum number of bits in the //// result mantissa including the leading 1 while normalization is //// not complete, //// if the most significant bit of the result mantissa is not 1, //// decriment the result exponent; //// shift the result mantissa left 1 bit w/0. //// Actions: //// RES_EXP = RES_EXP + 1 //// RES_MAN = RES_MAN << 7 //// clear done //// repeat 25: //// if done == 1 then do nothing //// else done == 0 so //// if msb(RES_MAN) == 1 then //// RES_MAN = RES_MAN << 1 //// done = 1 //// else msb(RES_MAN) == 0 so //// RES_EXP = RES_EXP - 1 //// RES_MAN = RES_MAN << 1 //// Output State: //// RES_MAN -> RE //// RES_EXP -> R4, IN1 //// sign(RES_MAN) -> sign_res //// msb(RES_MAN) -> manthad1 //// ANDEXP -> R6 //// // carry_word( IN1 (RES_EXP) + 1 ) -> RR do({ NMEM, CI1, NDIS }); // OUT -> IN2 (0) // IN1 + 1 + RR -> R4, OUT (RES_EXP += 1) do({ OIN2, RR, NSH, SUM1S, R4, SUM1S, NCAR, NDIS }); // OUT -> IN1 (RES_EXP) // RE << 7 -> RE (RES_MAN) // 0 -> done e_in_0 = 0; do({ OIN1, RE, `SH 7, SRC, RE, ZEROS, NCAR, NDIS }); done = 0; repeat (25) if (done) // normalizing begin // squander clock cycle(s) for synchronization do({ NOOP }); do({ NOOP }); do({ NOOP }); end else // not done normalizing begin manthad1 = flags_0[REG_MSB_F]; if (manthad1) // msb of last shift = 1 begin // squander clock cycle(s) for synchronization do({ NOOP }); do({ NOOP }); // RE << 1 -> RE (RES_MAN) // 1 -> done do({ NMEM, RE, `SH 1, SRC, RE, ZEROS, NCAR, NDIS }); done = 1; end else // msb of last shift = 0 begin // carry_word( IN1 (RES_EXP) + ~IN2 (~0) ) -> RR do({ NMEM, CD1, NDIS }); // IN1 + ~IN2 (~0) + RR -> R4, OUT (RES_EXP -= 1) do({ NMEM, RR, NSH, SUM1N2S, R4, SUM1N2S, NCAR, NDIS }); // OUT -> IN1 (RES_EXP) // RE << 1 -> RE (RES_MAN) do({ OIN1, RE, `SH 1, SRC, RE, ZEROS, NCAR, NDIS }); end end // of not done normalizing $display("5. Normalize: %d cycles", clock_ct - old_clock_ct); old_clock_ct = clock_ct; //// 6. Encode //// Description: //// Encode the mantissa, exponent, and sign into a floating-point //// word. Handle special cases zero, overflow, and underflow. //// Actions: //// if true_msb(RES_MAN) == 0 || RES_EXP < 0 then RESULT = 0 //// else RESULT <> 0 so //// if RES_EXP with maximum allowed bits masked off == 0 //// then 0 <= |RESULT| <= max so //// |RESULT| << 1 = (RES_MAN >> 8) | (RES_EXP << 24) //// else |RESULT| > maximum allowed bits so //// |RESULT| << 1 = maximum value //// RESULT = |RESULT| >> 1 w/sign //// store RESULT //// if ( ~manthad1 | flags_0[IN1_MSB_F] ) // RESULT = 0 begin // squander clock cycle(s) for synchronization do({ NOOP }); do({ NOOP }); do({ NOOP }); do({ NOOP }); do({ NOOP }); do({ NOOP }); // 0 -> OUT, R6 (RESULT = 0) do({ NMEM, R6, NSH, ZEROS, R6, ZEROS, NCAR, NDIS }); end else // RESULT <> 0 begin // R6 -> OUT (ANDEXP) do({ NMEM, R6, NSH, SRC, R6, SRC, NCAR, NDIS }); // OUT -> IN2 (ANDEXP) // RE -> RW (RES_MAN) do({ OIN2, RE, NSH, SRC, RW, ZEROS, NCAR, NDIS }); // IN1 inverse_masked_by IN2 -> OUT (excess RES_EXP) // IN1 -> RE (RES_EXP) do({ NMEM, R0, NSH, IN1, RE, `I 00110000, NCAR, NDIS }); if ( flags_0[OUT_ZER_F] ) // 0 <= |RESULT| <= max begin // RW >> 8 -> OUT (RES_MAN) do({ NMEM, RW, `SH 8, ZEROS, R9, SRC, NCAR, NDIS }); // OUT -> IN2 (RES_MAN) // RE << 24 -> R4 (RES_EXP) do({ OIN2, RE, `SH 24, SRC, R4, ZEROS, NCAR, NDIS }); // IN2 | R4 -> RW (RESULT = RES_MAN | RES_EXP) do({ NMEM, R4, NSH, OR2S, RW, ZEROS, NCAR, NDIS }); end else // |RESULT| > max begin // squander clock cycle(s) for synchronization do({ NOOP }); do({ NOOP }); // ones -> RW (RESULT = limit) do({ NMEM, RW, NSH, ONES, RW, ZEROS, NCAR, NDIS }); end // sign_res -> w_in_0 // RW >> 1 -> OUT, R6 (RESULT >>= 1 w/sign) w_in_0 = sign_res; do({ NMEM, RW, `SH 1, SRC, R6, SRC, NCAR, NDIS }); end // of else RESULT <> 0 // OUT -> RES (RESULT) do({ A_RES, OMEM, NALU, NCAR, NDIS }); $display("6. Encode: %d cycles", clock_ct - old_clock_ct); $display(" Total: %d cycles", clock_ct); $display; word = pe_0.mem[A_OP1]; $display(" OP1: sign %b, exponent %b ($%x),", word[`WORD_WIDTH-1], word[`WORD_WIDTH-2:23], word[`WORD_WIDTH-2:23] ); $display(" mantissa %b", word[22:0] ); word = pe_0.mem[A_OP2]; if (subtract) $write("- "); else $write("+ "); $display("OP2: sign %b, exponent %b ($%x),", word[`WORD_WIDTH-1], word[`WORD_WIDTH-2:23], word[`WORD_WIDTH-2:23] ); $display(" mantissa %b", word[22:0] ); word = pe_0.mem[A_RES]; $display("= RES: sign %b, exponent %b ($%x),", word[`WORD_WIDTH-1], word[`WORD_WIDTH-2:23], word[`WORD_WIDTH-2:23] ); $display(" mantissa %b", word[22:0] ); $display; dump; end task reset_regs; begin #NS reset = 1; #NS reset = 0; end endtask task do; input [`ADDR_WIDTH+44-1:0] i_0; begin instr_0 = i_0; #NS clock = 1; #NS clock = 0; clock_ct = clock_ct + 1; end endtask task dump; begin #NS dump_r_0 = 1; #NS dump_r_0 = 0; #NS dump_m_0 = 1; #NS dump_m_0 = 0; end endtask task form_feed; $write("\14"); endtask endmodule Host command: verilog Command arguments: pe.v pc13-3.v VERILOG-XL 1.6a.4 log file created Jul 14, 1994 19:15:16 * Copyright Cadence Design Systems, Inc. 1985, 1988. * * All Rights Reserved. Licensed Software. * * Confidential and proprietary information which is the * * property of Cadence Design Systems, Inc. * Compiling source file "pe.v" Compiling source file "pc13-3.v" Warning! Port sizes differ in port connection (port 14) [Verilog-PCDPC] "pc13-3.v", 28: 0 Highest level modules: pe0 pe pe2 pe4 pc13 Addition/Subtraction, Floating Point, 32-Bit, 1 PE PE "PE_0" Reset, Clock Cycle # 0 1. Load and Decode: 10 cycles 2. Align: 100 cycles 3. Add or Subtract: 5 cycles 4. Complement: 3 cycles 5. Normalize: 78 cycles 6. Encode: 8 cycles Total: 204 cycles OP1: sign 0, exponent 10000000 ($80), mantissa 10000000000000000000000 - OP2: sign 0, exponent 10000001 ($81), mantissa 11000000000000000000000 = RES: sign 1, exponent 10000001 ($81), mantissa 00000000000000000000000 PE "PE_0" Port and Register Dump # 0, Clock Cycle # 204 PE_INSTR: mb_addr 0010 mb_srce mb_d_mem mb_d_in2 mb_d_in1 0 1 0 0 srce_reg shift reg_res dest_reg out_res 0000 000000 10101010 0000 00000000 car_in1 car_in2 car_nin1 car_nin2 car_srce car_val 0 0 0 0 0 0 alu_dis alu_dis_i mb_dis mb_dis_i (p) 0 0 0 0 FLAGS: reg<0 reg=0 out<0 out=0 in2<0 in2=0 in1<0 in1=0 0 0 0 1 0 1 0 0 disab carry (r) 0 1 IN1: 00000000000000000000000010000001 (r) IN2: 00000000000000000000000000000000 (r) OUT: 00000000000000000000000000000000 (r) R0: 00000000000000000000000010000001 (r) R1: 00000000011000000000000000000000 (r) R2: 00000000000000000000000010000001 (r) R3: 00000000111000000000000000000000 (r) R4: 10000001000000000000000000000000 (r) R5: 01000000111000000000000000000000 (r) R6: 11000000100000000000000000000000 (r) R7: 00000000000000000000000000000000 (r) R8: 00000000000000000000000000000000 (r) R9: 00000000000000000000000000000000 (r) DISABLE: 00000000000000000000000000000000 (r) ROUTER: 11111111111111111111111111111100 (r) R_IN: 00000000000000000000000000000000 (p/r) Note reading from R_IN is really reading from ROUTER! NORTH: 00000000000000000000000000000000 (r) N_IN: 00000000000000000000000000000000 (p) SOUTH: 00000000000000000000000000000000 (r) S_IN: 00000000000000000000000000000000 (p) EAST: 00000000000000000000000010000001 (r) E_IN: 0 (p) W_IN: 1 (p) WEST: 10000001000000000000000000000000 (r) PE "PE_0" Memory Dump # 0, Clock Clycle # 204 0: 40400000 40e00000 c0800000 007fffff 4: 00800000 000000ff xxxxxxxx xxxxxxxx 8: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx c: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 35 warnings 28795 simulation events CPU time: 1.4 secs to compile + 0.5 secs to link + 4.5 secs in simulation End of VERILOG-XL 1.6a.4 Jul 14, 1994 19:15:22 module pc14; // pc14-3.v // a Verilog-XL behavioral model of a reconfigurable processor configuration // for a 64-bit floating-point add/subtract with 1 PE // (using complementation and variable-bit-shift EAST & WEST registers) // (highest level module; requires module pe3 in file pe.v) // for Dr. W. B. Ligon, E&CE dept., Clemson U., 1992-4 // by Ken Winiecki // system parameters: `define WORD_WIDTH 64 // width of PE word, in bits `define ADDR_WIDTH 4 // width of PE memory address, in bits `define MEM_LENGTH 16 // length of PE memory, in words `define MHZ 16.7 // clock speed, in "megahertz" // "`define"s are used to circumnavigate a Verilog-XL bug that // prevents "parameter"s from working as bit length specifiers!!! // variable declarations: reg clock, reset, dump_r_0, dump_m_0, subtract, sign_op1, sign_op2, zero_op1, zero_op2, sign_res, done, manthad1, e_in_0, w_in_0, op2_smaller; reg [`WORD_WIDTH-1:0] word; reg [`ADDR_WIDTH+44-1:0] instr_0; wire [9:0] flags_0; integer clock_ct, old_clock_ct; // PE instances and connections: pe3 pe_0 (clock, reset, instr_0, flags_0, 0, 0, e_in_0, w_in_0, 0, , , , , , 0,,,,, dump_r_0, dump_m_0); defparam // set PE-instance parameters to system parameters: pe_0.ADDR_WIDTH = `ADDR_WIDTH, pe_0.WORD_WIDTH = `WORD_WIDTH, pe_0.MEM_LENGTH = `MEM_LENGTH, pe_0.PE_NAME = "PE_0"; // PE instruction fields and bit widths: // // mb_addr mb_srce mb_d_mem mb_d_in2 mb_d_in1 ... // (`ADDR_WIDTH) 1 1 1 1 // // src_reg shift reg_res dest_reg out_res ... // 4 6 8 4 8 // // car_in1 car_in2 car_nin1 car_nin2 car_srce car_val ... // 1 1 1 1 1 1 // // alu_dis alu_dis_s mb_dis mb_dis_s // 1 1 1 1 parameter // PE instruction field descriptions and values: // mb_addr: memory bus transfer address // mb_srce: source of memory bus transfer is ... OUT = 1'b0, // ... register "OUT" MEM = 1'b1, // ... memory // mb_d_mem & mb_d_in2 & mb_d_in1: destinations of memory bus transfers F = 1'b0, // false T = 1'b1, // true // srce_reg & dest_reg: destination and source registers of ALU operations R0 = 4'b0000, R1 = 4'b0001, R2 = 4'b0010, R3 = 4'b0011, R4 = 4'b0100, R5 = 4'b0101, R6 = 4'b0110, R7 = 4'b0111, R8 = 4'b1000, R9 = 4'b1001, RD = 4'b1010, RR = 4'b1011, RN = 4'b1100, RS = 4'b1101, RE = 4'b1110, RW = 4'b1111, // shift: shift east or west source register "shift" bits before using // reg_res & out_res: ALU operations for "OUT" and destination reg results IN1 = 8'b11110000, IN2 = 8'b11001100, SRC = 8'b10101010, NIN1 = 8'b00001111, NIN2 = 8'b00110011, NSRC = 8'b01010101, ZEROS = 8'b00000000, ONES = 8'b11111111, AND12 = 8'b11000000, OR12 = 8'b11111100, XOR12 = 8'b00111100, NAND12 = 8'b00111111, NOR12 = 8'b00000011, NXOR12 = 8'b11000011, AND1S = 8'b10100000, OR1S = 8'b11111010, XOR1S = 8'b01011010, NAND1S = 8'b01011111, NOR1S = 8'b00000101, NXOR1S = 8'b10100101, SUMN1S = NXOR1S, SUM1S = XOR1S, AND2S = 8'b10001000, OR2S = 8'b11101110, XOR2S = 8'b01100110, NAND2S = 8'b01110111, NOR2S = 8'b00010001, NXOR2S = 8'b10011001, SUMN2S = NXOR2S, SUM2S = XOR2S, AND12S = 8'b10000000, OR12S = 8'b11111110, XOR12S = 8'b01111110, NAND12S = 8'b01111111, NOR12S = 8'b00000001, NXOR12S = 8'b10000001, SUM12S = 8'b10010110, SUM1N2S = 8'b01101001, SUM2N1S = SUM1N2S; // Note: The PE only does EITHER an ALU OR a carry operation in one // instruction cycle. A carry word is computed using registers IN_1 and/or // IN/2 and a carry-in bit, and a carry operation is implied by specifying // the registers to use. The carry word is made available at the input // from the router, r_in, and the carry-out is placed in the carry flag of // the p_flags register. // car_in1: use "IN1" in carry word computation; T or F // car_in2: use "IN2" in carry word computation; T or F // car_nin1: use inverse of "IN1" (if use was specified); T or F // car_nin2: use inverse of "IN2" (if use was specified); T or F // car_srce: use value for carry-in (or else use carry flag); T or F // car_val: use 1 for value of carry-in (if use was specified); T or F // alu_dis & mb_dis: allow disabling of ALU/memory bus operation; T or F // alu_dis_i & mb_dis_i: invert PE disable bit for ALU/memory bus op; T or F parameter // bit positions of PE flags: // Note that flags other than carry are not affected by a carry operation, // and the carry flag is not affected by an ALU operation. REG_MSB_F = 9, // m.s.b. of destination register REG_ZER_F = 8, // if destination register = 0 OUT_MSB_F = 7, // m.s.b. of register OUT OUT_ZER_F = 6, // if register OUT = 0 IN2_MSB_F = 5, // m.s.b. of register IN2 IN2_ZER_F = 4, // if register IN2 = 0 IN1_MSB_F = 3, // m.s.b. of register IN1 IN1_ZER_F = 2, // if register IN1 = 0 DISABLE_F = 1, // state of PE disable bit CARRY_F = 0; // carry-out of last carry operation // convenience parameters and definitions for writing PE instructions: parameter NS = 500/`MHZ; // when specifying clock timing `define W `WORD_WIDTH'h // for specifying memory values (in hex) `define M `ADDR_WIDTH'h // for specifying memory address (in hex) parameter MIN1 = { MEM, F, F, T }; // when moving memory only to "IN1" parameter MIN2 = { MEM, F, T, F }; // when moving memory only to "IN2" parameter OMEM = { OUT, T, F, F }; // when moving "OUT" only to memory parameter OIN1 = { `M 0, OUT, F, F, T }; // when moving "OUT" only to "IN1" parameter OIN2 = { `M 0, OUT, F, T, F }; // when moving "OUT" only to "IN2" parameter NMEM = { `M 0, OUT, F, F, F }; // when not using memory bus `define SH 6'd // for specifying shift (in decimal) parameter NSH = `SH 0; // when not shifting (not using east/west srce regs) `define I 8'b // for specifying ALU operation parameter NALU = { R0, NSH, SRC, R0, ZEROS }; // when not doing ALU ops // (note that flags are affected and OUT is cleared) parameter NCAR = { F, F, F, F, F, F }; // when not doing carry operations parameter C1P2 = { NALU, T, T, F, F, T, F }; // for carry for "IN1" + "IN2" parameter C1M2 = { NALU, T, T, F, T, T, T }; // for carry for "IN1" - "IN2" parameter C2M1 = { NALU, T, T, T, F, T, T }; // for carry for "IN2" - "IN1" parameter CM1 = { NALU, T, F, T, F, T, T }; // for carry to negate "IN1" parameter CM2 = { NALU, F, T, F, T, T, T }; // for carry to negate "IN2" parameter CI1 = { NALU, T, F, F, F, T, T }; // for carry to increment "IN1" parameter CI2 = { NALU, F, T, F, F, T, T }; // for carry to increment "IN2" parameter CD1 = { NALU, T, T, F, T, T, F }; // for carry to dec IN1 if IN2=0 parameter CD2 = { NALU, T, T, T, F, T, F }; // for carry to dec IN2 if IN1=0 parameter NDIS = { F, F, F, F }; // when not allowing any disabling parameter NOOP = { NMEM, NALU, NCAR, NDIS }; // when squandering clock cycles // (note that flags are affected and OUT is cleared) // Write a program in Verilog-XL to control the processing elements directly, // and put it in the "initial" block to execute. Set up the PE memory. // Reset the PE by clocking it's p_reset line. (Clock a line using #NS to // delay each transition.) Build a PE instruction using the above defined // instruction fields and value parameters. Clock the PEs. Monitor the // flags of a PE using the above bit position parameters. Note: Verilog-XL // has no way of dealing realistically with "don't cares," so never allow the // use of "don't knows" (undefined values)!!! // 64-bit floating-point representation: // bits 0-51: 52 lower-order bits of 53-bit normalized unsigned mantissa // bits 52-63: 11-bit biased exponent (bias = $3ff) // bit 63: 1-bit sign of mantissa // special floating-point values: // all 1s : positive maximum (not infinity) // all 1s except sign : negative maximum (not infinity) // all zeros : zero // all zeros except sign : negative minimum (not zero) // floating-point addition program: // 1. Load and Decode // 2. Align // 3. Add or Subtract // 4. Complement // 5. Normalize // 6. Encode // register assignments: // R0 = operand 1 exponent R1 = operand 1 mantissa // R2 = operand 2 exponent R3 = operand 2 mantissa // R4 = result exponent R5 = result mantissa // R6 = AND-mask for isolating/testing right-justified exponent // memory address definitions: parameter A_OP1 = `M 0, // address of first operand A_OP2 = `M 1, // address of second operand A_RES = `M 2, // address of result A_ANDMAN = `M 3, // address of AND-mask for isolating mantissa A_ORMAN = `M 4, // address of OR-mask for replacing leading 1 of // mantissa A_ANDEXP = `M 5; // address of AND-mask for isolating/testing // right-justified exponent initial begin form_feed; $display("Addition/Subtraction, Floating Point, 64-Bit, 1 PE"); clock = 0; clock_ct = 0; old_clock_ct = 0; dump_r_0 = 0; dump_m_0 = 0; // ++++----++++---- pe_0.mem[A_ANDMAN] = `W 000fffffffffffff; // mantissa AND-mask pe_0.mem[A_ORMAN] = `W 0010000000000000; // mantissa OR-mask pe_0.mem[A_ANDEXP] = `W 00000000000007ff; // exponent AND-mask // +++----++++ pe_0.mem[A_OP1] = { 1'b 0, 11'b 10000000000, 52'b 1000000000000000000000000000000000000000000000000000 }; // 3 // ++++----++++----++++----++++----++++----++++----++++ pe_0.mem[A_OP2] = { 1'b 0, 11'b 10000000001, 52'b 1100000000000000000000000000000000000000000000000000 }; // 7 subtract = 1; reset_regs; //// 1. Load and Decode //// Description: //// Load operands and decode signs, exponents, and mantissas. Handle //// special case zero. //// Actions: //// sign(OP1) = msb(OP1) //// if OP1 == 0 then OP1_MAN = 0 //// else OP1 <> 0 so OP1_MAN = (OP1 & ANDMAN) | ORMAN //// msb(OP2) == sign(OP2) //// if OP2 == 0 then OP2_MAN = 0 //// else OP2 <> 0 so OP2_MAN = (OP2 & ANDMAN) | ORMAN //// OP1_EXP = OP1 >> 52 & ANDEXP //// OP2_EXP = OP2 >> 52 & ANDEXP //// Output State: //// sign(OP1) -> sign_op1 //// OP1_MAN -> R1 //// sign(OP2) -> sign_op2 //// OP2_MAN -> R3 //// OP1_EXP -> R0, IN1 //// OP2_EXP -> R2, IN2 //// ANDEXP -> R6 //// // OP1 -> IN1 // msb(OP1) -> sign_op1 // zero(OP1) -> zero_op1 do({ A_OP1, MIN1, NALU, NCAR, NDIS }); sign_op1 = flags_0[IN1_MSB_F]; zero_op1 = flags_0[IN1_ZER_F]; // IN1 -> R4 (OP1) // OP2 -> IN2 // msb(OP2) -> sign_op2 // zero(OP2) -> zero_op2 do({ A_OP2, MIN2, R0, NSH, IN1, R4, ZEROS, NCAR, NDIS }); sign_op2 = flags_0[IN2_MSB_F]; zero_op2 = flags_0[IN2_ZER_F]; // IN2 -> R5 (OP2) // ORMAN -> IN2 do({ A_ORMAN, MIN2, R0, NSH, IN2, R5, ZEROS, NCAR, NDIS }); // IN1 -> RW (OP1) // ANDMAN -> IN1 do({ A_ANDMAN, MIN1, R0, NSH, IN1, RW, ZEROS, NCAR, NDIS }); if (zero_op1) // 0 -> R1 (OP1_MAN = 0) do({ NMEM, R1, NSH, ZEROS, R1, ZEROS, NCAR, NDIS }); else // (R4 & IN1) | IN2 -> R1 (OP1_MAN = (OP1 & ANDMAN) | ORMAN) do({ NMEM, R4, NSH, `I 11101100, R1, ZEROS, NCAR, NDIS }); if (zero_op2) // 0 -> R3 (OP2_MAN = 0) // ANDEXP -> IN2 do({ A_ANDEXP, MIN2, R3, NSH, ZEROS, R3, ZEROS, NCAR, NDIS }); else // (R5 & IN1) | IN2 -> R3 (OP2_MAN = (OP2 & ANDMAN) | ORMAN) // ANDEXP -> IN2 do({ A_ANDEXP, MIN2, R5, NSH, `I 11101100, R3, ZEROS, NCAR, NDIS }); // (RW >> 52) & IN2 -> R0, OUT (OP1_EXP = OP1 >> 52 & ANDEXP) w_in_0 = 0; do({ NMEM, RW, `SH 52, AND2S, R0, AND2S, NCAR, NDIS }); // R5 -> RW (OP2) // OUT -> IN1 (OP1_EXP) do({ OIN1, R5, NSH, SRC, RW, ZEROS, NCAR, NDIS }); // (RW >> 52) & IN2 -> R2, OUT (OP2_EXP = OP2 >> 52 & ANDEXP) do({ NMEM, RW, `SH 52, AND2S, R2, AND2S, NCAR, NDIS }); // IN2 -> R6 (ANDEXP) // OUT -> IN2 (OP2_EXP) do({ OIN2, R0, NSH, IN2, R6, ZEROS, NCAR, NDIS }); $display("1. Load and Decode: %d cycles", clock_ct - old_clock_ct); old_clock_ct = clock_ct; //// 2. Align //// Description: //// Determine which exponent is smaller. //// For a number of times equal to the maximum number of bits in a //// mantissa including the leading 1, //// increment the smaller exponent, and //// shift the mantissa of the operand with the smaller exponent //// right by one (truncate). //// Use the larger exponent for the result. //// Actions: //// if OP1_EXP - OP2_EXP < 0 then OP2_EXP < OP1_EXP //// else OP1_EXP <= OP2_EXP //// repeat 53: //// if OP1_EXP == OP2_EXP then do nothing //// else if OP2_EXP < OP1_EXP then //// OP2_EXP = OP2_EXP + 1 //// OP2_MAN = OP2_MAN >> 1 //// else OP1_EXP < OP2_EXP so //// OP1_EXP = OP1_EXP + 1 //// OP1_MAN = OP1_MAN >> 1 //// (end repeat) //// if OP2_EXP <= OP1_EXP then RES_EXP = OP1_EXP //// else OP1_EXP < OP2_EXP so RES_EXP = OP2_EXP //// Output state: //// sign(OP1) -> sign_op1 //// OP1_MAN -> R1 //// sign(OP2) -> sign_op2 //// OP2_MAN -> R3 //// OP1_EXP -> R0, IN1 //// OP2_EXP -> R2, IN2 //// RES_EXP -> OUT //// ANDEXP -> R6 //// // carry_word(IN1 (OP1_EXP) - IN2 (OP2_EXP)) -> RR do({ NMEM, C1M2, NDIS }); // IN1 - IN2 + RR -> OUT (EXP_DIFF = OP1_EXP - OP2_EXP) // carry_out(EXP_DIFF) -> op2_smaller do({ NMEM, RR, NSH, SRC, RR, SUM1N2S, NCAR, NDIS }); op2_smaller = flags_0[CARRY_F]; if (op2_smaller) // OP2_EXP <= OP1_EXP // R3 -> RW (OP2_MAN) do({ NMEM, R3, NSH, SRC, RW, ZEROS, NCAR, NDIS }); else // OP1_EXP < OP2_EXP // R1 -> RW (OP1_MAN) do({ NMEM, R1, NSH, SRC, RW, ZEROS, NCAR, NDIS }); repeat (53) begin // IN1 XOR IN2 -> OUT (EXP_DIFF = OP1_EXP XOR OP2_EXP) do({ NMEM, R0, NSH, SRC, R0, XOR12, NCAR, NDIS }); if (flags_0[OUT_ZER_F]) // EXP_DIFF = 0, so OP1_EXP = OP2_EXP begin // squander clock cycle(s) for equalization do(NOOP); do(NOOP); do(NOOP); end else // EXP_DIFF <> 0... if (op2_smaller) // OP2_EXP < OP1_EXP begin // carry_word(IN2 (OP2_EXP) + 1) -> RR do({ NMEM, CI2, NDIS }); // IN2 + 1 + RR -> R2, OUT (OP2_EXP += 1) do({ NMEM, RR, NSH, SUM2S, R2, SUM2S, NCAR, NDIS }); // OUT -> IN2 (OP2_EXP) // RW >> 1 -> RW (OP2_MAN >> 1) do({ OIN2, RW, `SH 1, SRC, RW, ZEROS, NCAR, NDIS }); end else // OP1_EXP < OP2_EXP begin // carry_word(IN1 (OP1_EXP) + 1) -> RR do({ NMEM, CI1, NDIS }); // IN1 + 1 + RR -> R0, OUT (OP1_EXP += 1) do({ NMEM, RR, NSH, SUM1S, R0, SUM1S, NCAR, NDIS }); // OUT -> IN1 (OP1_EXP) // RW >> 1 -> RW (OP1_MAN >> 1) do({ OIN1, RW, `SH 1, SRC, RW, ZEROS, NCAR, NDIS }); end end // of repeat (53) if (op2_smaller) // OP2_EXP <= OP1_EXP // RW -> R3 (OP2_MAN) // IN1 -> OUT (RES_EXP = OP2_EXP) do({ NMEM, RW, NSH, SRC, R3, IN1, NCAR, NDIS }); else // OP1_EXP < OP2_EXP // RW -> R1 (OP1_MAN) // IN2 -> OUT (RES_EXP = OP1_EXP) do({ NMEM, RW, NSH, SRC, R1, IN2, NCAR, NDIS }); $display("2. Align: %d cycles", clock_ct - old_clock_ct); old_clock_ct = clock_ct; //// 3. Add or Subtract //// Description: //// Depending on the signs of the mantissas and the desired operation, //// add or subtract, and determine the sign. //// sign_op1 | sign_op2 | desired || do | sign_res | case //// ----------+-----------+---------++---------+-----------+----- //// + | + | + || 1 + 2 | + | A //// + | + | - || 1 - 2 | ? | B //// + | - | + || 1 - 2 | ? | B //// + | - | _ || 1 + 2 | + | A //// - | + | + || 2 - 1 | ? | C //// - | + | - || 1 + 2 | - | D //// - | - | + || 1 + 2 | - | D //// - | - | - || 2 - 1 | ? | C //// Actions: //// RES_MAN = +/- OP1_MAN +/- OP2_MAN //// sign(RES) = msb(RES_MAN)/+/- //// Output State: //// RES_MAN -> OUT, RE //// RES_EXP -> R4 //// sign(RES) -> sign_res //// ANDEXP -> R6 //// // OUT -> IN1 (RES_EXP) // R1 -> OUT (OP1_MAN) do({ OIN1, R1, NSH, SRC, R1, SRC, NCAR, NDIS }); // IN1 -> R4 (RES_EXP) // OUT -> IN1 (OP1_MAN) // R3 -> OUT (OP2_MAN) do({ OIN1, R3, NSH, IN1, R4, SRC, NCAR, NDIS }); // OUT -> IN2 (OP2_MAN) do({ OIN2, NALU, NCAR, NDIS }); if ( !sign_op1 && !sign_op2 && !subtract || !sign_op1 && sign_op2 && subtract ) // case A begin // carry_word(IN1 (OP1_MAN) + IN2 (OP2_MAN)) -> RR do({ NMEM, C1P2, NDIS }); // IN1 + IN2 + RR -> OUT, RE (RES_MAN = OP1_MAN + OP2_MAN) // positive -> sign(RES) do({ NMEM, RR, NSH, SUM12S, RE, SUM12S, NCAR, NDIS }); sign_res = 0; end else if ( !sign_op1 && !sign_op2 && subtract || !sign_op1 && sign_op2 && !subtract ) // case B begin // carry_word(IN1 (OP1_MAN) - IN2 (OP2_MAN)) -> RR do({ NMEM, C1M2, NDIS }); // IN1 - IN2 + RR -> OUT, RE (RES_MAN = OP1_MAN - OP2_MAN) // sign(OP1_MAN - OP2_MAN) -> sign(RES) do({ NMEM, RR, NSH, SUM1N2S, RE, SUM1N2S, NCAR, NDIS }); sign_res = flags_0[OUT_MSB_F]; end else if ( sign_op1 && !sign_op2 && !subtract || sign_op1 && sign_op2 && subtract ) // case C begin // carry_word(IN2 (OP2_MAN) - IN1 (OP1_MAN)) -> RR do({ NMEM, C2M1, NDIS }); // IN2 - IN1 + RR -> OUT, RE (RES_MAN = OP2_MAN - OP1_MAN) // sign(OP2_MAN - OP1_MAN) -> sign(RES) do({ NMEM, RR, NSH, SUM2N1S, RE, SUM2N1S, NCAR, NDIS }); sign_res = flags_0[OUT_MSB_F]; end else if ( sign_op1 && !sign_op2 && subtract || sign_op1 && sign_op2 && !subtract ) // case D begin // carry_word(IN1 (OP1_MAN) + IN2 (OP2_MAN)) -> RR do({ NMEM, C1P2, NDIS }); // IN1 + IN2 + RR -> OUT, RE (RES_MAN = OP1_MAN + OP2_MAN) // negative -> sign(RES) do({ NMEM, RR, NSH, SUM12S, RE, SUM12S, NCAR, NDIS }); sign_res = 1; end $display("3. Add or Subtract: %d cycles", clock_ct - old_clock_ct); old_clock_ct = clock_ct; //// 4. Complement //// Description: //// If the result mantissa is negative in 2s-complement, change it. //// Actions: //// if msb(RES_MAN) == 1 then RES_MAN < 0 so RES_MAN = - RES_MAN //// Output State: //// RES_MAN -> RE //// RES_EXP -> R4, IN1 //// sign(RES_MAN) -> sign_res //// ANDEXP -> R6 //// zero -> OUT //// if (flags_0[OUT_MSB_F]) // RES_MAN < 0 begin // OUT -> IN2 (RES_MAN) // R4 -> OUT (RES_EXP) do({ OIN2, R4, NSH, SRC, R4, SRC, NCAR, NDIS }); // carry_word ( -IN2 (RES_MAN) ) -> RR // OUT -> IN1 (RES_EXP) do({ OIN1, CM2, NDIS }); // -IN2 + RR -> RE (RES_MAN = -RES_MAN) // 0 -> OUT do({ NMEM, MIN2, RR, NSH, SUMN2S, RE, ZEROS, NCAR, NDIS }); end else // RES_MAN >= 0 begin // squander clock cycle(s) for equalization do( NOOP ); // R4 -> OUT (RES_EXP) do({ NMEM, MIN2, R4, NSH, SRC, R4, SRC, NCAR, NDIS }); // OUT -> IN1 (RES_EXP) // 0 -> OUT do({ OIN1, NALU, NCAR, NDIS }); end $display("4. Complement: %d cycles", clock_ct - old_clock_ct); old_clock_ct = clock_ct; //// 5. Normalize //// Description: //// The maximum number of bits for the result mantissa is one more //// than for the operand mantissa, so increment the result exponent //// to prepare for starting normalization from the additional bit. //// For a number of times equal to the maximum number of bits in the //// result mantissa including the leading 1 while normalization is //// not complete, //// if the most significant bit of the result mantissa is not 1, //// decriment the result exponent; //// shift the result mantissa left 1 bit w/0. //// Actions: //// RES_EXP = RES_EXP + 1 //// RES_MAN = RES_MAN << 10 //// clear done //// repeat 54: //// if done == 1 then do nothing //// else done == 0 so //// if msb(RES_MAN) == 1 then //// RES_MAN = RES_MAN << 1 //// done = 1 //// else msb(RES_MAN) == 0 so //// RES_EXP = RES_EXP - 1 //// RES_MAN = RES_MAN << 1 //// Output State: //// RES_MAN -> RE //// RES_EXP -> R4, IN1 //// sign(RES_MAN) -> sign_res //// msb(RES_MAN) -> manthad1 //// ANDEXP -> R6 //// // carry_word( IN1 (RES_EXP) + 1 ) -> RR do({ NMEM, CI1, NDIS }); // OUT -> IN2 (0) // IN1 + 1 + RR -> R4, OUT (RES_EXP += 1) do({ OIN2, RR, NSH, SUM1S, R4, SUM1S, NCAR, NDIS }); // OUT -> IN1 (RES_EXP) // RE << 10 -> RE (RES_MAN) // 0 -> done e_in_0 = 0; do({ OIN1, RE, `SH 10, SRC, RE, ZEROS, NCAR, NDIS }); done = 0; repeat (54) if (done) // normalizing begin // squander clock cycle(s) for synchronization do({ NOOP }); do({ NOOP }); do({ NOOP }); end else // not done normalizing begin manthad1 = flags_0[REG_MSB_F]; if (manthad1) // msb of last shift = 1 begin // squander clock cycle(s) for synchronization do({ NOOP }); do({ NOOP }); // RE << 1 -> RE (RES_MAN) // 1 -> done do({ NMEM, RE, `SH 1, SRC, RE, ZEROS, NCAR, NDIS }); done = 1; end else // msb of last shift = 0 begin // carry_word( IN1 (RES_EXP) + ~IN2 (~0) ) -> RR do({ NMEM, CD1, NDIS }); // IN1 + ~IN2 (~0) + RR -> R4, OUT (RES_EXP -= 1) do({ NMEM, RR, NSH, SUM1N2S, R4, SUM1N2S, NCAR, NDIS }); // OUT -> IN1 (RES_EXP) // RE << 1 -> RE (RES_MAN) do({ OIN1, RE, `SH 1, SRC, RE, ZEROS, NCAR, NDIS }); end end // of not done normalizing $display("5. Normalize: %d cycles", clock_ct - old_clock_ct); old_clock_ct = clock_ct; //// 6. Encode //// Description: //// Encode the mantissa, exponent, and sign into a floating-point //// word. Handle special cases zero, overflow, and underflow. //// Actions: //// if true_msb(RES_MAN) == 0 || RES_EXP < 0 then RESULT = 0 //// else RESULT <> 0 so //// if RES_EXP with maximum allowed bits masked off == 0 //// then 0 <= |RESULT| <= max so //// |RESULT| << 1 = (RES_MAN >> 11) | (RES_EXP << 53) //// else |RESULT| > maximum allowed bits so //// |RESULT| << 1 = maximum value //// RESULT = |RESULT| >> 1 w/sign //// store RESULT //// if ( ~manthad1 | flags_0[IN1_MSB_F] ) // RESULT = 0 begin // squander clock cycle(s) for synchronization do({ NOOP }); do({ NOOP }); do({ NOOP }); do({ NOOP }); do({ NOOP }); do({ NOOP }); // 0 -> OUT, R6 (RESULT = 0) do({ NMEM, R6, NSH, ZEROS, R6, ZEROS, NCAR, NDIS }); end else // RESULT <> 0 begin // R6 -> OUT (ANDEXP) do({ NMEM, R6, NSH, SRC, R6, SRC, NCAR, NDIS }); // OUT -> IN2 (ANDEXP) // RE -> RW (RES_MAN) do({ OIN2, RE, NSH, SRC, RW, ZEROS, NCAR, NDIS }); // IN1 inverse_masked_by IN2 -> OUT (excess RES_EXP) // IN1 -> RE (RES_EXP) do({ NMEM, R0, NSH, IN1, RE, `I 00110000, NCAR, NDIS }); if ( flags_0[OUT_ZER_F] ) // 0 <= |RESULT| <= max begin // RW >> 11 -> OUT (RES_MAN) do({ NMEM, RW, `SH 11, ZEROS, R9, SRC, NCAR, NDIS }); // OUT -> IN2 (RES_MAN) // RE << 53 -> R4 (RES_EXP) do({ OIN2, RE, `SH 53, SRC, R4, ZEROS, NCAR, NDIS }); // IN2 | R4 -> RW (RESULT = RES_MAN | RES_EXP) do({ NMEM, R4, NSH, OR2S, RW, ZEROS, NCAR, NDIS }); end else // |RESULT| > max begin // squander clock cycle(s) for synchronization do({ NOOP }); do({ NOOP }); // ones -> RW (RESULT = limit) do({ NMEM, RW, NSH, ONES, RW, ZEROS, NCAR, NDIS }); end // sign_res -> w_in_0 // RW >> 1 -> OUT, R6 (RESULT >>= 1 w/sign) w_in_0 = sign_res; do({ NMEM, RW, `SH 1, SRC, R6, SRC, NCAR, NDIS }); end // of else RESULT <> 0 // OUT -> RES (RESULT) do({ A_RES, OMEM, NALU, NCAR, NDIS }); $display("6. Encode: %d cycles", clock_ct - old_clock_ct); $display(" Total: %d cycles", clock_ct); $display; word = pe_0.mem[A_OP1]; $display(" OP1: sign %b, exponent %b,", word[`WORD_WIDTH-1], word[`WORD_WIDTH-2:52] ); $display(" mantissa %b", word[51:0] ); word = pe_0.mem[A_OP2]; if (subtract) $write("- "); else $write("+ "); $display("OP2: sign %b, exponent %b,", word[`WORD_WIDTH-1], word[`WORD_WIDTH-2:52] ); $display(" mantissa %b", word[51:0] ); word = pe_0.mem[A_RES]; $display("= RES: sign %b, exponent %b,", word[`WORD_WIDTH-1], word[`WORD_WIDTH-2:52] ); $display(" mantissa %b", word[51:0] ); $display; dump; end task reset_regs; begin #NS reset = 1; #NS reset = 0; end endtask task do; input [`ADDR_WIDTH+44-1:0] i_0; begin instr_0 = i_0; #NS clock = 1; #NS clock = 0; clock_ct = clock_ct + 1; end endtask task dump; begin #NS dump_r_0 = 1; #NS dump_r_0 = 0; #NS dump_m_0 = 1; #NS dump_m_0 = 0; end endtask task form_feed; $write("\14"); endtask endmodule Host command: verilog Command arguments: pe.v pc14-3.v VERILOG-XL 1.6a.4 log file created Jul 14, 1994 19:15:30 * Copyright Cadence Design Systems, Inc. 1985, 1988. * * All Rights Reserved. Licensed Software. * * Confidential and proprietary information which is the * * property of Cadence Design Systems, Inc. * Compiling source file "pe.v" Compiling source file "pc14-3.v" Highest level modules: pe0 pe pe2 pe4 pc14a Addition/Subtraction, Floating Point, 64-Bit, 1 PE PE "PE_0" Reset, Clock Cycle # 0 1. Load and Decode: 10 cycles 2. Align: 216 cycles 3. Add or Subtract: 5 cycles 4. Complement: 3 cycles 5. Normalize: 165 cycles 6. Encode: 8 cycles Total: 407 cycles OP1: sign 0, exponent 10000000000, mantissa 1000000000000000000000000000000000000000000000000000 - OP2: sign 0, exponent 10000000001, mantissa 1100000000000000000000000000000000000000000000000000 = RES: sign 1, exponent 10000000001, mantissa 0000000000000000000000000000000000000000000000000000 PE "PE_0" Port and Register Dump # 0, Clock Cycle # 407 PE_INSTR: mb_addr 0010 mb_srce mb_d_mem mb_d_in2 mb_d_in1 0 1 0 0 srce_reg shift reg_res dest_reg out_res 0000 000000 10101010 0000 00000000 car_in1 car_in2 car_nin1 car_nin2 car_srce car_val 0 0 0 0 0 0 alu_dis alu_dis_i mb_dis mb_dis_i (p) 0 0 0 0 FLAGS: reg<0 reg=0 out<0 out=0 in2<0 in2=0 in1<0 in1=0 0 0 0 1 0 1 0 0 disab carry (r) 0 1 IN1: 0000000000000000000000000000000000000000000000000000010000000001 (r) IN2: 0000000000000000000000000000000000000000000000000000000000000000 (r) OUT: 0000000000000000000000000000000000000000000000000000000000000000 (r) R0: 0000000000000000000000000000000000000000000000000000010000000001 (r) R1: 0000000000001100000000000000000000000000000000000000000000000000 (r) R2: 0000000000000000000000000000000000000000000000000000010000000001 (r) R3: 0000000000011100000000000000000000000000000000000000000000000000 (r) R4: 1000000000100000000000000000000000000000000000000000000000000000 (r) R5: 0100000000011100000000000000000000000000000000000000000000000000 (r) R6: 1100000000010000000000000000000000000000000000000000000000000000 (r) R7: 0000000000000000000000000000000000000000000000000000000000000000 (r) R8: 0000000000000000000000000000000000000000000000000000000000000000 (r) R9: 0000000000000000000000000000000000000000000000000000000000000000 (r) DISABLE: 0000000000000000000000000000000000000000000000000000000000000000 (r) ROUTER: 1111111111111111111111111111111111111111111111111111111111111100 (r) R_IN: 0000000000000000000000000000000000000000000000000000000000000000 (p/r) Note reading from R_IN is really reading from ROUTER! NORTH: 0000000000000000000000000000000000000000000000000000000000000000 (r) N_IN: 0000000000000000000000000000000000000000000000000000000000000000 (p) SOUTH: 0000000000000000000000000000000000000000000000000000000000000000 (r) S_IN: 0000000000000000000000000000000000000000000000000000000000000000 (p) EAST: 0000000000000000000000000000000000000000000000000000010000000001 (r) E_IN: 0 (p) W_IN: 1 (p) WEST: 1000000000100000000000000000000000000000000000000000000000000000 (r) PE "PE_0" Memory Dump # 0, Clock Clycle # 407 0: 4008000000000000 401c000000000000 2: c010000000000000 000fffffffffffff 4: 0010000000000000 00000000000007ff 6: xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx 8: xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx a: xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx c: xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx e: xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx 38 warnings 96126 simulation events CPU time: 1.4 secs to compile + 0.5 secs to link + 16.1 secs in simulation End of VERILOG-XL 1.6a.4 Jul 14, 1994 19:15:48 module pc15; // pc15-3.v // a Verilog-XL behavioral model of a reconfigurable processor configuration // for a 32-bit floating-point multiply with 1 PE // (using complementation and variable-bit-shift EAST & WEST registers) // (highest level module; requires module pe3 in file pe.v) // for Dr. W. B. Ligon, E&CE dept., Clemson U., 1992-4 // by Ken Winiecki // system parameters: `define WORD_WIDTH 32 // width of PE word, in bits `define ADDR_WIDTH 4 // width of PE memory address, in bits `define MEM_LENGTH 16 // length of PE memory, in words `define MHZ 16.7 // clock speed, in "megahertz" // "`define"s are used to circumnavigate a Verilog-XL bug that // prevents "parameter"s from working as bit length specifiers!!! // variable declarations: reg clock, reset, dump_r_0, dump_m_0, e_in_0, w_in_0, zero_res, sign_res; reg [`WORD_WIDTH-1:0] word; reg [`ADDR_WIDTH+44-1:0] instr_0; wire w_out_0; wire [9:0] flags_0; integer clock_ct, old_clock_ct; // PE instances and connections: pe3 pe_0 (clock, reset, instr_0, flags_0, 0, 0, e_in_0, w_in_0, 0, , , , w_out_0, , 0,,,,, dump_r_0, dump_m_0); defparam // set PE-instance parameters to system parameters: pe_0.ADDR_WIDTH = `ADDR_WIDTH, pe_0.WORD_WIDTH = `WORD_WIDTH, pe_0.MEM_LENGTH = `MEM_LENGTH, pe_0.PE_NAME = "PE_0"; // PE instruction fields and bit widths: // // mb_addr mb_srce mb_d_mem mb_d_in2 mb_d_in1 ... // (`ADDR_WIDTH) 1 1 1 1 // // src_reg shift reg_res dest_reg out_res ... // 4 6 8 4 8 // // car_in1 car_in2 car_nin1 car_nin2 car_srce car_val ... // 1 1 1 1 1 1 // // alu_dis alu_dis_s mb_dis mb_dis_s // 1 1 1 1 parameter // PE instruction field descriptions and values: // mb_addr: memory bus transfer address // mb_srce: source of memory bus transfer is ... OUT = 1'b0, // ... register "OUT" MEM = 1'b1, // ... memory // mb_d_mem & mb_d_in2 & mb_d_in1: destinations of memory bus transfers F = 1'b0, // false T = 1'b1, // true // srce_reg & dest_reg: destination and source registers of ALU operations R0 = 4'b0000, R1 = 4'b0001, R2 = 4'b0010, R3 = 4'b0011, R4 = 4'b0100, R5 = 4'b0101, R6 = 4'b0110, R7 = 4'b0111, R8 = 4'b1000, R9 = 4'b1001, RD = 4'b1010, RR = 4'b1011, RN = 4'b1100, RS = 4'b1101, RE = 4'b1110, RW = 4'b1111, // shift: shift east or west source register "shift" bits before using // reg_res & out_res: ALU operations for "OUT" and destination reg results IN1 = 8'b11110000, IN2 = 8'b11001100, SRC = 8'b10101010, NIN1 = 8'b00001111, NIN2 = 8'b00110011, NSRC = 8'b01010101, ZEROS = 8'b00000000, ONES = 8'b11111111, AND12 = 8'b11000000, OR12 = 8'b11111100, XOR12 = 8'b00111100, NAND12 = 8'b00111111, NOR12 = 8'b00000011, NXOR12 = 8'b11000011, AND1S = 8'b10100000, OR1S = 8'b11111010, XOR1S = 8'b01011010, NAND1S = 8'b01011111, NOR1S = 8'b00000101, NXOR1S = 8'b10100101, SUMN1S = NXOR1S, SUM1S = XOR1S, AND2S = 8'b10001000, OR2S = 8'b11101110, XOR2S = 8'b01100110, NAND2S = 8'b01110111, NOR2S = 8'b00010001, NXOR2S = 8'b10011001, SUMN2S = NXOR2S, SUM2S = XOR2S, AND12S = 8'b10000000, OR12S = 8'b11111110, XOR12S = 8'b01111110, NAND12S = 8'b01111111, NOR12S = 8'b00000001, NXOR12S = 8'b10000001, SUM12S = 8'b10010110, SUM1N2S = 8'b01101001, SUM2N1S = SUM1N2S; // Note: The PE only does EITHER an ALU OR a carry operation in one // instruction cycle. A carry word is computed using registers IN_1 and/or // IN/2 and a carry-in bit, and a carry operation is implied by specifying // the registers to use. The carry word is made available at the input // from the router, r_in, and the carry-out is placed in the carry flag of // the p_flags register. // car_in1: use "IN1" in carry word computation; T or F // car_in2: use "IN2" in carry word computation; T or F // car_nin1: use inverse of "IN1" (if use was specified); T or F // car_nin2: use inverse of "IN2" (if use was specified); T or F // car_srce: use value for carry-in (or else use carry flag); T or F // car_val: use 1 for value of carry-in (if use was specified); T or F // alu_dis & mb_dis: allow disabling of ALU/memory bus operation; T or F // alu_dis_i & mb_dis_i: invert PE disable bit for ALU/memory bus op; T or F parameter // bit positions of PE flags: // Note that flags other than carry are not affected by a carry operation, // and the carry flag is not affected by an ALU operation. REG_MSB_F = 9, // m.s.b. of destination register REG_ZER_F = 8, // if destination register = 0 OUT_MSB_F = 7, // m.s.b. of register OUT OUT_ZER_F = 6, // if register OUT = 0 IN2_MSB_F = 5, // m.s.b. of register IN2 IN2_ZER_F = 4, // if register IN2 = 0 IN1_MSB_F = 3, // m.s.b. of register IN1 IN1_ZER_F = 2, // if register IN1 = 0 DISABLE_F = 1, // state of PE disable bit CARRY_F = 0; // carry-out of last carry operation // convenience parameters and definitions for writing PE instructions: parameter NS = 500/`MHZ; // when specifying clock timing `define W `WORD_WIDTH'h // for specifying memory values (in hex) `define M `ADDR_WIDTH'h // for specifying memory address (in hex) parameter MIN1 = { MEM, F, F, T }; // when moving memory only to "IN1" parameter MIN2 = { MEM, F, T, F }; // when moving memory only to "IN2" parameter OMEM = { OUT, T, F, F }; // when moving "OUT" only to memory parameter OIN1 = { `M 0, OUT, F, F, T }; // when moving "OUT" only to "IN1" parameter OIN2 = { `M 0, OUT, F, T, F }; // when moving "OUT" only to "IN2" parameter NMEM = { `M 0, OUT, F, F, F }; // when not using memory bus `define SH 6'd // for specifying shift (in decimal) parameter NSH = `SH 0; // when not shifting (not using east/west srce regs) `define I 8'b // for specifying ALU operation parameter NALU = { R0, NSH, SRC, R0, ZEROS }; // when not doing ALU ops // (note that flags are affected and OUT is cleared) parameter NCAR = { F, F, F, F, F, F }; // when not doing carry operations parameter C1P2 = { NALU, T, T, F, F, T, F }; // for carry for "IN1" + "IN2" parameter C1M2 = { NALU, T, T, F, T, T, T }; // for carry for "IN1" - "IN2" parameter C2M1 = { NALU, T, T, T, F, T, T }; // for carry for "IN2" - "IN1" parameter CM1 = { NALU, T, F, T, F, T, T }; // for carry to negate "IN1" parameter CM2 = { NALU, F, T, F, T, T, T }; // for carry to negate "IN2" parameter CI1 = { NALU, T, F, F, F, T, T }; // for carry to increment "IN1" parameter CI2 = { NALU, F, T, F, F, T, T }; // for carry to increment "IN2" parameter C0 = { NALU, T, F, F, F, T, F }; // for carry = 0 parameter NDIS = { F, F, F, F }; // when not allowing any disabling parameter NOOP = { NMEM, NALU, NCAR, NDIS }; // when squandering clock cycles // (note that flags are affected and OUT is cleared) // Write a program in Verilog-XL to control the processing elements directly, // and put it in the "initial" block to execute. Set up the PE memory. // Reset the PE by clocking it's p_reset line. (Clock a line using #NS to // delay each transition.) Build a PE instruction using the above defined // instruction fields and value parameters. Clock the PEs. Monitor the // flags of a PE using the above bit position parameters. Note: Verilog-XL // has no way of dealing realistically with "don't cares," so never allow the // use of "don't knows" (undefined values)!!! // 32-bit floating-point representation: // bits 0-22: 23 lower-order bits of 24-bit normalized unsigned mantissa // bits 23-30: 8-bit biased exponent (bias = $7F) // bit 31: 1-bit sign of mantissa // special floating-point values: // all 1s : positive maximum (not infinity) // all 1s except sign : negative maximum (not infinity) // all zeros : zero // all zeros except sign : negative minimum (not zero) // floating-point multiplication program: // 1. Load and Add // 2. Multiply // 3. Normalize and Encode // register assignments: // R0 = (operand 1) operand 1 mantissa // R1 = (operand 2) // R2 = AND-mask for isolating/testing right-justified exponent // R3 = result exponent // R4 = result // memory address definitions: parameter A_OP1 = `M 0, // address of first operand A_OP2 = `M 1, // address of second operand A_RES = `M 2, // address of result A_ANDMAN = `M 3, // address of AND-mask for isolating mantissa A_ORMAN = `M 4, // address of OR-mask for replacing leading 1 of // mantissa A_ANDEXP = `M 5, // address of AND-mask for isolating/testing // right-justified exponent A_BIAS = `M 6; // address of exponent bias initial begin form_feed; $display("Multiplication, Floating Point, 32-Bit, 1 PE"); clock = 0; clock_ct = 0; old_clock_ct = 0; dump_r_0 = 0; dump_m_0 = 0; pe_0.mem[A_ANDMAN] = `W 007fffff; // mantissa AND-mask pe_0.mem[A_ORMAN] = `W 00800000; // mantissa OR-mask pe_0.mem[A_ANDEXP] = `W 000000ff; // exponent AND-mask pe_0.mem[A_BIAS] = `W 0000007f; // exponent bias pe_0.mem[A_OP1] = { 1'b 0, 8'h 80, 23'b 10000000000000000000000 }; // 3 pe_0.mem[A_OP2] = { 1'b 0, 8'h 81, 23'b 01000000000000000000000 }; // 5 reset_regs; //// 1. Load and Add //// Description: //// Load operands; determine zeros, signs, exponents, and mantissas; //// add exponents and correct bias; set up for multiply. //// Note corrected biased exponent sum could be negative. //// Note if zero is determined, exponent and mantissa values are //// immaterial. //// Actions: //// zero(RES) = zero(OP1) | zero(OP2) //// sign(RES) = (msb(OP1) ^ msb(OP2)) & ~zero(RES) //// RES_EXP = (OP1 >> 23 & ANDEXP) + (OP2 >> 23 & ANDEXP) - BIAS //// OP1_MAN = (OP1 & ANDMAN) | ORMAN //// OP2_MAN = (OP2 & ANDMAN) | ORMAN //// RES_MAN = 0 //// Output State: //// OP1_MAN -> R0 //// OP2_MAN -> IN2 //// RES_MAN -> OUT //// ANDEXP -> R2 //// RES_EXP -> R3 //// zero(RES) -> zero_res //// sign(RES) -> sign_res //// // OP1 -> IN1 // zero(OP1) -> zero_res // sign(OP1) -> sign_res do({ A_OP1, MIN1, NALU, NCAR, NDIS }); zero_res = flags_0[IN1_ZER_F]; sign_res = flags_0[IN1_MSB_F]; // IN1 -> RW (OP1) // ANDEXP -> IN2 do({ A_ANDEXP, MIN2, R0, NSH, IN1, RW, ZEROS, NCAR, NDIS }); // IN1 -> R0 (OP1) // OP2 -> IN1 // zero(OP2) | zero_res -> zero_res // (sign(OP2) ^ sign_res) & ~zero_res -> sign_res do({ A_OP2, MIN1, R0, NSH, IN1, R0, ZEROS, NCAR, NDIS }); zero_res = zero_res | flags_0[IN1_ZER_F]; sign_res = (sign_res ^ flags_0[IN1_MSB_F]) & ~zero_res; // RW >> 23 & IN2 -> OUT (OP1_EXP = OP1 >> 23 & ANDEXP) // IN1 -> RW (OP2) w_in_0 = 0; do({ NMEM, RW, `SH 23, IN1, RW, AND2S, NCAR, NDIS }); // RW >> 23 & IN2 -> OUT (OP2_EXP = OP2 >> 23 & ANDEXP) // IN1 -> R1 (OP2) // OUT -> IN1 (OP1_EXP) do({ OIN1, RW, `SH 23, IN1, R1, AND2S, NCAR, NDIS }); // IN2 -> R2 (ANDEXP) // OUT -> IN2 (OP2_EXP) do({ OIN2, R0, NSH, IN2, R2, ZEROS, NCAR, NDIS }); // carry_word( IN1 (OP1_EXP) + IN2 (OP2_EXP) ) -> RR do({ NMEM, C1P2, NDIS }); // IN1 + IN2 + RR -> OUT (RES_EXP = OP1_EXP + OP2_EXP) // BIAS -> IN2 do({ A_BIAS, MIN2, RR, NSH, SRC, RR, SUM12S, NCAR, NDIS }); // OUT -> IN1 (RES_EXP) do({ OIN1, NALU, NCAR, NDIS }); // carry_word( IN1 (RES_EXP) - IN2 (BIAS) ) -> RR do({ NMEM, C1M2, NDIS }); // IN1 - IN2 + RR -> R3 (RES_EXP = RES_EXP - BIAS) // ANDMAN -> IN1 do({ A_ANDMAN, MIN1, RR, NSH, SUM1N2S, R3, ZEROS, NCAR, NDIS }); // ORMAN -> IN2 do({ A_ORMAN, MIN2, NALU, NCAR, NDIS }); // (R1 & IN1) | IN2 -> OUT (OP2_MAN = (OP2 & ANDMAN) | ORMAN) do({ NMEM, R1, NSH, SRC, R1, `I 11101100, NCAR, NDIS }); // OUT -> IN2 (OP2_MAN) // (R0 & IN1) | IN2 -> R0 (OP1_MAN = (OP1 & ANDMAN) | ORMAN) // 0 -> OUT (RES_MAN = 0) do({ OIN2, R0, NSH, `I 11101100, R0, ZEROS, NCAR, NDIS }); $display("1. Load and Add: %d cycles", clock_ct - old_clock_ct); old_clock_ct = clock_ct; //// 2. Multiply //// Description: //// For a number of times equal to the maximum number of bits in a //// mantissa including the leading 1, //// rotate one mantissa right; //// if the msb of the rotated mantissa is 1, //// add the other mantissa to the result mantissa; //// shift the result mantissa right 1 with 0. //// Note the lower bits of the result mantissa are not required //// because the leading 1s of the operands guarantee the bit length //// of the result to be either the maximum or one bit less. //// Note the carry-in to the shift of the result mantissa is always 0 //// because the operands summed are many bits shorter than a word. //// Actions: //// repeat 24: //// OP1_MAN = OP1_MAN >> 1 w/lsb(OP1_MAN) (rotated) //// if msb(OP1_MAN) == 1 then RES_MAN = RES_MAN + OP2_MAN //// otherwise msb(OP1_MAN) == 0 so do nothing //// RES_MAN = RES_MAN >> 1 w/0 //// Output state: //// RES_MAN -> RW, IN1 //// ANDEXP -> R2 //// RES_EXP -> R3 //// zero(RES) -> zero_res //// sign(RES) -> sign_res //// repeat (24) begin // OUT -> IN1 (RES_MAN) // R0 -> RW (OP1_MAN) do({ OIN1, R0, NSH, SRC, RW, ZEROS, NCAR, NDIS }); // RW >> 1 w/w_out_0 -> R0 (OP1_MAN rotated right) w_in_0 = w_out_0; do({ NMEM, RW, `SH 1, SRC, R0, ZEROS, NCAR, NDIS }); if (flags_0[REG_MSB_F]) // OP1_MAN digit == 1, so add to PSUM begin // carry_word(IN1 (RES_MAN) + IN2 (OP2_MAN)) -> RR do({ NMEM, C1P2, NDIS }); // IN1 + IN2 + RR -> RW, OUT (RES_MAN = RES_MAN + OP2_MAN) do({ NMEM, RR, NSH, SUM12S, RW, SUM12S, NCAR, NDIS }); end else // OP1_MAN digit == 0, so do not add to PSUM begin // squander clock cycle(s) for equalization do({ NOOP }); // IN1 -> RW, OUT (RES_MAN) do({ NMEM, RW, NSH, IN1, RW, IN1, NCAR, NDIS }); end // OUT -> IN1 (RES_MAN) // RW >> 1 w/0 -> OUT (next RES_MAN = RES_MAN >> 1 w/0) w_in_0 = 0; do({ OIN1, RW, `SH 1, ZEROS, R1, SRC, NCAR, NDIS }); end $display("2. Multiply: %d cycles", clock_ct - old_clock_ct); old_clock_ct = clock_ct; //// 3. Normalize and Encode //// Description: //// If the mantissa is 1 bit too long, //// increment the exponent, and //// truncate the mantissa by 1 bit; //// determine if the exponent is greater than the maximum; //// encode the exponent, mantissa, and sign, and //// handle zero, overflow, and underflow. //// Actions: //// if RES_MAN[25] then //// RES_EXP = RES_EXP + 1 //// RES_MAN = RES_MAN >> 1 w/0 //// excess(RES_EXP) = RES_EXP inverse_masked_by ANDEXP //// if OP1 == 0 || OP2 == 0 || RES_EXP < 0 then RESULT = 0 //// else RESULT <> 0 so //// if excess(RES_EXP) == 0 then 0 <= |RESULT| <= maximum so //// |RESULT| = (RES_EXP << 24) | RES_MAN //// else excess(RES_EXP) <> 0 so |RESULT| > maximum so //// |RESULT| = maximum //// RESULT = |RESULT| >> 1 w/sign //// store RESULT //// // IN1 -> RE (RES_MAN) // R3 -> OUT (RES_EXP) do({ NMEM, R3, NSH, IN1, RE, SRC, NCAR, NDIS }); // OUT -> IN2 (RES_EXP) // RE << 7 -> RE (RES_MAN << 7) e_in_0 = 0; do({ OIN2, RE, `SH 7, SRC, RE, ZEROS, NCAR, NDIS }); if (flags_0[OUT_MSB_F]) // extra bit exists, so RES_MAN 1 bit too large begin // RE << 1 -> RW (RES_MAN = RES_MAN w/o leading 1) do({ NMEM, RE, `SH 1, SRC, RW, ZEROS, NCAR, NDIS }); // carry_word( IN2 (RES_EXP) + 1 ) -> RR do({ NMEM, CI2, NDIS }); // IN2 + 1 + RR -> RE, OUT (RES_EXP = RES_EXP + 1) do({ NMEM, RR, NSH, SUM2S, RE, SUM2S, NCAR, NDIS }); end else // no extra bit exists, so RES_MAN is normalized begin // RE << 2 -> RW (RES_MAN = RES_MAN w/o leading 1) do({ NMEM, RE, `SH 2, SRC, RW, ZEROS, NCAR, NDIS }); // squander clock cycle(s) for equalization do({ NOOP }); // IN2 -> RE, OUT (RES_EXP) do({ NMEM, RE, NSH, IN2, RE, IN2, NCAR, NDIS }); end // sign(RES_EXP) | zero_res -> zero_res // OUT -> IN2 (RES_EXP) // RW >> 8 -> OUT (RES_MAN = RES_MAN >> 8 w/0) zero_res = zero_res | flags_0[OUT_MSB_F]; do({ OIN2, RW, `SH 8, SRC, RW, SRC, NCAR, NDIS }); // OUT -> IN1 (RES_MAN) // IN2 inverse_masked_by R2 -> OUT (excess RES_EXP using ANDEXP) do({ OIN1, R2, NSH, SRC, R2, `I 01000100, NCAR, NDIS }); if (zero_res) // RESULT = 0 // 0 -> RW ( |RESULT| = 0) do({ NMEM, RW, NSH, ZEROS, RW, ZEROS, NCAR, NDIS }); else // RESULT <=> 0 if (flags_0[OUT_ZER_F]) // 0 <= |RESULT| <= max // (RE << 24) | IN1 -> RW ( |RESULT| = (RES_EXP << 24) | RES_MAN ) do({ NMEM, RE, `SH 24, OR1S, RW, ZEROS, NCAR, NDIS }); else // |RESULT| > max // ones -> RW ( |RESULT| = limit ) do({ NMEM, RW, NSH, ONES, RW, ZEROS, NCAR, NDIS }); // sign_res -> w_in_0 // RW >> 1 -> OUT, R4 (RESULT >> 1 w/sign) w_in_0 = sign_res; do({ NMEM, RW, `SH 1, SRC, R4, SRC, NCAR, NDIS }); // OUT -> RES (RESULT) do({ A_RES, OMEM, NALU, NCAR, NDIS }); $display("3. Normalize and Encode: %d cycles", clock_ct - old_clock_ct); $display(" Total: %d cycles", clock_ct); dump; word = pe_0.mem[A_OP1]; $display(" OP1: sign %b, exponent %b ($%x),", word[`WORD_WIDTH-1], word[`WORD_WIDTH-2:23], word[`WORD_WIDTH-2:23] ); $display(" mantissa %b", word[22:0] ); word = pe_0.mem[A_OP2]; $display("x OP2: sign %b, exponent %b ($%x),", word[`WORD_WIDTH-1], word[`WORD_WIDTH-2:23], word[`WORD_WIDTH-2:23] ); $display(" mantissa %b", word[22:0] ); word = pe_0.mem[A_RES]; $display("= RES: sign %b, exponent %b ($%x),", word[`WORD_WIDTH-1], word[`WORD_WIDTH-2:23], word[`WORD_WIDTH-2:23] ); $display(" mantissa %b", word[22:0] ); end task reset_regs; begin #NS reset = 1; #NS reset = 0; end endtask task do; input [`ADDR_WIDTH+44-1:0] i_0; begin instr_0 = i_0; #NS clock = 1; #NS clock = 0; clock_ct = clock_ct + 1; end endtask task dump; begin #NS dump_r_0 = 1; #NS dump_r_0 = 0; #NS dump_m_0 = 1; #NS dump_m_0 = 0; end endtask task form_feed; $write("\14"); endtask endmodule Host command: verilog Command arguments: pe.v pc15-3.v VERILOG-XL 1.6a.4 log file created Jul 14, 1994 19:15:55 * Copyright Cadence Design Systems, Inc. 1985, 1988. * * All Rights Reserved. Licensed Software. * * Confidential and proprietary information which is the * * property of Cadence Design Systems, Inc. * Compiling source file "pe.v" Compiling source file "pc15-3.v" Warning! Port sizes differ in port connection (port 14) [Verilog-PCDPC] "pc15-3.v", 28: 0 Highest level modules: pe0 pe pe2 pe4 pc15 Multiplication, Floating Point, 32-Bit, 1 PE PE "PE_0" Reset, Clock Cycle # 0 1. Load and Add: 14 cycles 2. Multiply: 120 cycles 3. Normalize and Encode: 10 cycles Total: 144 cycles PE "PE_0" Port and Register Dump # 0, Clock Cycle # 144 PE_INSTR: mb_addr 0010 mb_srce mb_d_mem mb_d_in2 mb_d_in1 0 1 0 0 srce_reg shift reg_res dest_reg out_res 0000 000000 10101010 0000 00000000 car_in1 car_in2 car_nin1 car_nin2 car_srce car_val 0 0 0 0 0 0 alu_dis alu_dis_i mb_dis mb_dis_i (p) 0 0 0 0 FLAGS: reg<0 reg=0 out<0 out=0 in2<0 in2=0 in1<0 in1=0 1 0 0 1 0 0 0 0 disab carry (r) 0 0 IN1: 00000000111000000000000000000000 (r) IN2: 00000000000000000000000010000010 (r) OUT: 00000000000000000000000000000000 (r) R0: 11000000000000000000000000000000 (r) R1: 00000000000000000000000000000000 (r) R2: 00000000000000000000000011111111 (r) R3: 00000000000000000000000010000010 (r) R4: 01000001011100000000000000000000 (r) R5: 00000000000000000000000000000000 (r) R6: 00000000000000000000000000000000 (r) R7: 00000000000000000000000000000000 (r) R8: 00000000000000000000000000000000 (r) R9: 00000000000000000000000000000000 (r) DISABLE: 00000000000000000000000000000000 (r) ROUTER: 00000000000000000000000000000000 (r) R_IN: 00000000000000000000000000000000 (p/r) Note reading from R_IN is really reading from ROUTER! NORTH: 00000000000000000000000000000000 (r) N_IN: 00000000000000000000000000000000 (p) SOUTH: 00000000000000000000000000000000 (r) S_IN: 00000000000000000000000000000000 (p) EAST: 00000000000000000000000010000010 (r) E_IN: 0 (p) W_IN: 0 (p) WEST: 10000010111000000000000000000000 (r) PE "PE_0" Memory Dump # 0, Clock Clycle # 144 0: 40400000 40a00000 41700000 007fffff 4: 00800000 000000ff 0000007f xxxxxxxx 8: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx c: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx OP1: sign 0, exponent 10000000 ($80), mantissa 10000000000000000000000 x OP2: sign 0, exponent 10000001 ($81), mantissa 01000000000000000000000 = RES: sign 0, exponent 10000010 ($82), mantissa 11100000000000000000000 35 warnings 20949 simulation events CPU time: 1.4 secs to compile + 0.5 secs to link + 3.4 secs in simulation End of VERILOG-XL 1.6a.4 Jul 14, 1994 19:16:00 module pc16; // pc16-3.v // a Verilog-XL behavioral model of a reconfigurable processor configuration // for a 64-bit floating-point multiply with 1 PE // (using complementation and variable-bit-shift EAST & WEST registers) // (highest level module; requires module pe3 in file pe.v) // for Dr. W. B. Ligon, E&CE dept., Clemson U., 1992-4 // by Ken Winiecki // system parameters: `define WORD_WIDTH 64 // width of PE word, in bits `define ADDR_WIDTH 4 // width of PE memory address, in bits `define MEM_LENGTH 16 // length of PE memory, in words `define MHZ 16.7 // clock speed, in "megahertz" // "`define"s are used to circumnavigate a Verilog-XL bug that // prevents "parameter"s from working as bit length specifiers!!! // variable declarations: reg clock, reset, dump_r_0, dump_m_0, e_in_0, w_in_0, zero_res, sign_res; reg [`WORD_WIDTH-1:0] word; reg [`ADDR_WIDTH+44-1:0] instr_0; wire w_out_0; wire [9:0] flags_0; integer clock_ct, old_clock_ct; // PE instances and connections: pe3 pe_0 (clock, reset, instr_0, flags_0, 0, 0, e_in_0, w_in_0, 0, , , , w_out_0, , 0,,,,, dump_r_0, dump_m_0); defparam // set PE-instance parameters to system parameters: pe_0.ADDR_WIDTH = `ADDR_WIDTH, pe_0.WORD_WIDTH = `WORD_WIDTH, pe_0.MEM_LENGTH = `MEM_LENGTH, pe_0.PE_NAME = "PE_0"; // PE instruction fields and bit widths: // // mb_addr mb_srce mb_d_mem mb_d_in2 mb_d_in1 ... // (`ADDR_WIDTH) 1 1 1 1 // // src_reg shift reg_res dest_reg out_res ... // 4 6 8 4 8 // // car_in1 car_in2 car_nin1 car_nin2 car_srce car_val ... // 1 1 1 1 1 1 // // alu_dis alu_dis_s mb_dis mb_dis_s // 1 1 1 1 parameter // PE instruction field descriptions and values: // mb_addr: memory bus transfer address // mb_srce: source of memory bus transfer is ... OUT = 1'b0, // ... register "OUT" MEM = 1'b1, // ... memory // mb_d_mem & mb_d_in2 & mb_d_in1: destinations of memory bus transfers F = 1'b0, // false T = 1'b1, // true // srce_reg & dest_reg: destination and source registers of ALU operations R0 = 4'b0000, R1 = 4'b0001, R2 = 4'b0010, R3 = 4'b0011, R4 = 4'b0100, R5 = 4'b0101, R6 = 4'b0110, R7 = 4'b0111, R8 = 4'b1000, R9 = 4'b1001, RD = 4'b1010, RR = 4'b1011, RN = 4'b1100, RS = 4'b1101, RE = 4'b1110, RW = 4'b1111, // shift: shift east or west source register "shift" bits before using // reg_res & out_res: ALU operations for "OUT" and destination reg results IN1 = 8'b11110000, IN2 = 8'b11001100, SRC = 8'b10101010, NIN1 = 8'b00001111, NIN2 = 8'b00110011, NSRC = 8'b01010101, ZEROS = 8'b00000000, ONES = 8'b11111111, AND12 = 8'b11000000, OR12 = 8'b11111100, XOR12 = 8'b00111100, NAND12 = 8'b00111111, NOR12 = 8'b00000011, NXOR12 = 8'b11000011, AND1S = 8'b10100000, OR1S = 8'b11111010, XOR1S = 8'b01011010, NAND1S = 8'b01011111, NOR1S = 8'b00000101, NXOR1S = 8'b10100101, SUMN1S = NXOR1S, SUM1S = XOR1S, AND2S = 8'b10001000, OR2S = 8'b11101110, XOR2S = 8'b01100110, NAND2S = 8'b01110111, NOR2S = 8'b00010001, NXOR2S = 8'b10011001, SUMN2S = NXOR2S, SUM2S = XOR2S, AND12S = 8'b10000000, OR12S = 8'b11111110, XOR12S = 8'b01111110, NAND12S = 8'b01111111, NOR12S = 8'b00000001, NXOR12S = 8'b10000001, SUM12S = 8'b10010110, SUM1N2S = 8'b01101001, SUM2N1S = SUM1N2S; // Note: The PE only does EITHER an ALU OR a carry operation in one // instruction cycle. A carry word is computed using registers IN_1 and/or // IN/2 and a carry-in bit, and a carry operation is implied by specifying // the registers to use. The carry word is made available at the input // from the router, r_in, and the carry-out is placed in the carry flag of // the p_flags register. // car_in1: use "IN1" in carry word computation; T or F // car_in2: use "IN2" in carry word computation; T or F // car_nin1: use inverse of "IN1" (if use was specified); T or F // car_nin2: use inverse of "IN2" (if use was specified); T or F // car_srce: use value for carry-in (or else use carry flag); T or F // car_val: use 1 for value of carry-in (if use was specified); T or F // alu_dis & mb_dis: allow disabling of ALU/memory bus operation; T or F // alu_dis_i & mb_dis_i: invert PE disable bit for ALU/memory bus op; T or F parameter // bit positions of PE flags: // Note that flags other than carry are not affected by a carry operation, // and the carry flag is not affected by an ALU operation. REG_MSB_F = 9, // m.s.b. of destination register REG_ZER_F = 8, // if destination register = 0 OUT_MSB_F = 7, // m.s.b. of register OUT OUT_ZER_F = 6, // if register OUT = 0 IN2_MSB_F = 5, // m.s.b. of register IN2 IN2_ZER_F = 4, // if register IN2 = 0 IN1_MSB_F = 3, // m.s.b. of register IN1 IN1_ZER_F = 2, // if register IN1 = 0 DISABLE_F = 1, // state of PE disable bit CARRY_F = 0; // carry-out of last carry operation // convenience parameters and definitions for writing PE instructions: parameter NS = 500/`MHZ; // when specifying clock timing `define W `WORD_WIDTH'h // for specifying memory values (in hex) `define M `ADDR_WIDTH'h // for specifying memory address (in hex) parameter MIN1 = { MEM, F, F, T }; // when moving memory only to "IN1" parameter MIN2 = { MEM, F, T, F }; // when moving memory only to "IN2" parameter OMEM = { OUT, T, F, F }; // when moving "OUT" only to memory parameter OIN1 = { `M 0, OUT, F, F, T }; // when moving "OUT" only to "IN1" parameter OIN2 = { `M 0, OUT, F, T, F }; // when moving "OUT" only to "IN2" parameter NMEM = { `M 0, OUT, F, F, F }; // when not using memory bus `define SH 6'd // for specifying shift (in decimal) parameter NSH = `SH 0; // when not shifting (not using east/west srce regs) `define I 8'b // for specifying ALU operation parameter NALU = { R0, NSH, SRC, R0, ZEROS }; // when not doing ALU ops // (note that flags are affected and OUT is cleared) parameter NCAR = { F, F, F, F, F, F }; // when not doing carry operations parameter C1P2 = { NALU, T, T, F, F, T, F }; // for carry for "IN1" + "IN2" parameter C1M2 = { NALU, T, T, F, T, T, T }; // for carry for "IN1" - "IN2" parameter C2M1 = { NALU, T, T, T, F, T, T }; // for carry for "IN2" - "IN1" parameter CM1 = { NALU, T, F, T, F, T, T }; // for carry to negate "IN1" parameter CM2 = { NALU, F, T, F, T, T, T }; // for carry to negate "IN2" parameter CI1 = { NALU, T, F, F, F, T, T }; // for carry to increment "IN1" parameter CI2 = { NALU, F, T, F, F, T, T }; // for carry to increment "IN2" parameter C0 = { NALU, T, F, F, F, T, F }; // for carry = 0 parameter NDIS = { F, F, F, F }; // when not allowing any disabling parameter NOOP = { NMEM, NALU, NCAR, NDIS }; // when squandering clock cycles // (note that flags are affected and OUT is cleared) // Write a program in Verilog-XL to control the processing elements directly, // and put it in the "initial" block to execute. Set up the PE memory. // Reset the PE by clocking it's p_reset line. (Clock a line using #NS to // delay each transition.) Build a PE instruction using the above defined // instruction fields and value parameters. Clock the PEs. Monitor the // flags of a PE using the above bit position parameters. Note: Verilog-XL // has no way of dealing realistically with "don't cares," so never allow the // use of "don't knows" (undefined values)!!! // 64-bit floating-point representation: // bits 0-51: 52 lower-order bits of 53-bit normalized unsigned mantissa // bits 52-62: 11-bit biased exponent (bias = $3ff) // bit 63: 1-bit sign of mantissa // special floating-point values: // all 1s : positive maximum (not infinity) // all 1s except sign : negative maximum (not infinity) // all zeros : zero // all zeros except sign : negative minimum (not zero) // floating-point multiplication program: // 1. Load and Add // 2. Multiply // 3. Normalize and Encode // register assignments: // R0 = (operand 1) operand 1 mantissa // R1 = (operand 2) // R2 = AND-mask for isolating/testing right-justified exponent // R3 = result exponent // R4 = result // memory address definitions: parameter A_OP1 = `M 0, // address of first operand A_OP2 = `M 1, // address of second operand A_RES = `M 2, // address of result A_ANDMAN = `M 3, // address of AND-mask for isolating mantissa A_ORMAN = `M 4, // address of OR-mask for replacing leading 1 of // mantissa A_ANDEXP = `M 5, // address of AND-mask for isolating/testing // right-justified exponent A_BIAS = `M 6; // address of exponent bias initial begin form_feed; $display("Multiplication, Floating Point, 64-Bit, 1 PE"); clock = 0; clock_ct = 0; old_clock_ct = 0; dump_r_0 = 0; dump_m_0 = 0; // ++++----++++---- pe_0.mem[A_ANDMAN] = `W 000fffffffffffff; // mantissa AND-mask pe_0.mem[A_ORMAN] = `W 0010000000000000; // mantissa OR-mask pe_0.mem[A_ANDEXP] = `W 00000000000007ff; // exponent AND-mask pe_0.mem[A_BIAS] = `W 00000000000003ff; // exponent bias // +++----++++ pe_0.mem[A_OP1] = { 1'b 0, 11'b 10000000000, 52'b 1000000000000000000000000000000000000000000000000000 }; // 3 // ++++----++++----++++----++++----++++----++++----++++ pe_0.mem[A_OP2] = { 1'b 0, 11'b 10000000001, 52'b 0100000000000000000000000000000000000000000000000000 }; // 5 reset_regs; //// 1. Load and Add //// Description: //// Load operands; determine zeros, signs, exponents, and mantissas; //// add exponents and correct bias; set up for multiply. //// Note corrected biased exponent sum could be negative. //// Note if zero is determined, exponent and mantissa values are //// immaterial. //// Actions: //// zero(RES) = zero(OP1) | zero(OP2) //// sign(RES) = (msb(OP1) ^ msb(OP2)) & ~zero(RES) //// RES_EXP = (OP1 >> 52 & ANDEXP) + (OP2 >> 52 & ANDEXP) - BIAS //// OP1_MAN = (OP1 & ANDMAN) | ORMAN //// OP2_MAN = (OP2 & ANDMAN) | ORMAN //// RES_MAN = 0 //// Output State: //// OP1_MAN -> R0 //// OP2_MAN -> IN2 //// RES_MAN -> OUT //// ANDEXP -> R2 //// RES_EXP -> R3 //// zero(RES) -> zero_res //// sign(RES) -> sign_res //// // OP1 -> IN1 // zero(OP1) -> zero_res // sign(OP1) -> sign_res do({ A_OP1, MIN1, NALU, NCAR, NDIS }); zero_res = flags_0[IN1_ZER_F]; sign_res = flags_0[IN1_MSB_F]; // IN1 -> RW (OP1) // ANDEXP -> IN2 do({ A_ANDEXP, MIN2, R0, NSH, IN1, RW, ZEROS, NCAR, NDIS }); // IN1 -> R0 (OP1) // OP2 -> IN1 // zero(OP2) | zero_res -> zero_res // (sign(OP2) ^ sign_res) & ~zero_res -> sign_res do({ A_OP2, MIN1, R0, NSH, IN1, R0, ZEROS, NCAR, NDIS }); zero_res = zero_res | flags_0[IN1_ZER_F]; sign_res = (sign_res ^ flags_0[IN1_MSB_F]) & ~zero_res; // RW >> 52 & IN2 -> OUT (OP1_EXP = OP1 >> 52 & ANDEXP) // IN1 -> RW (OP2) w_in_0 = 0; do({ NMEM, RW, `SH 52, IN1, RW, AND2S, NCAR, NDIS }); // RW >> 52 & IN2 -> OUT (OP2_EXP = OP2 >> 52 & ANDEXP) // IN1 -> R1 (OP2) // OUT -> IN1 (OP1_EXP) do({ OIN1, RW, `SH 52, IN1, R1, AND2S, NCAR, NDIS }); // IN2 -> R2 (ANDEXP) // OUT -> IN2 (OP2_EXP) do({ OIN2, R0, NSH, IN2, R2, ZEROS, NCAR, NDIS }); // carry_word( IN1 (OP1_EXP) + IN2 (OP2_EXP) ) -> RR do({ NMEM, C1P2, NDIS }); // IN1 + IN2 + RR -> OUT (RES_EXP = OP1_EXP + OP2_EXP) // BIAS -> IN2 do({ A_BIAS, MIN2, RR, NSH, SRC, RR, SUM12S, NCAR, NDIS }); // OUT -> IN1 (RES_EXP) do({ OIN1, NALU, NCAR, NDIS }); // carry_word( IN1 (RES_EXP) - IN2 (BIAS) ) -> RR do({ NMEM, C1M2, NDIS }); // IN1 - IN2 + RR -> R3 (RES_EXP = RES_EXP - BIAS) // ANDMAN -> IN1 do({ A_ANDMAN, MIN1, RR, NSH, SUM1N2S, R3, ZEROS, NCAR, NDIS }); // ORMAN -> IN2 do({ A_ORMAN, MIN2, NALU, NCAR, NDIS }); // (R1 & IN1) | IN2 -> OUT (OP2_MAN = (OP2 & ANDMAN) | ORMAN) do({ NMEM, R1, NSH, SRC, R1, `I 11101100, NCAR, NDIS }); // OUT -> IN2 (OP2_MAN) // (R0 & IN1) | IN2 -> R0 (OP1_MAN = (OP1 & ANDMAN) | ORMAN) // 0 -> OUT (RES_MAN = 0) do({ OIN2, R0, NSH, `I 11101100, R0, ZEROS, NCAR, NDIS }); $display("1. Load and Add: %d cycles", clock_ct - old_clock_ct); old_clock_ct = clock_ct; //// 2. Multiply //// Description: //// For a number of times equal to the maximum number of bits in a //// mantissa including the leading 1, //// rotate one mantissa right; //// if the msb of the rotated mantissa is 1, //// add the other mantissa to the result mantissa; //// shift the result mantissa right 1 with 0. //// Note the lower bits of the result mantissa are not required //// because the leading 1s of the operands guarantee the bit length //// of the result to be either the maximum or one bit less. //// Note the carry-in to the shift of the result mantissa is always 0 //// because the operands summed are many bits shorter than a word. //// Actions: //// repeat 53: //// OP1_MAN = OP1_MAN >> 1 w/lsb(OP1_MAN) (rotated) //// if msb(OP1_MAN) == 1 then RES_MAN = RES_MAN + OP2_MAN //// otherwise msb(OP1_MAN) == 0 so do nothing //// RES_MAN = RES_MAN >> 1 w/0 //// Output state: //// RES_MAN -> RW, IN1 //// ANDEXP -> R2 //// RES_EXP -> R3 //// zero(RES) -> zero_res //// sign(RES) -> sign_res //// repeat (53) begin // OUT -> IN1 (RES_MAN) // R0 -> RW (OP1_MAN) do({ OIN1, R0, NSH, SRC, RW, ZEROS, NCAR, NDIS }); // RW >> 1 w/w_out_0 -> R0 (OP1_MAN rotated right) w_in_0 = w_out_0; do({ NMEM, RW, `SH 1, SRC, R0, ZEROS, NCAR, NDIS }); if (flags_0[REG_MSB_F]) // OP1_MAN digit == 1, so add to PSUM begin // carry_word(IN1 (RES_MAN) + IN2 (OP2_MAN)) -> RR do({ NMEM, C1P2, NDIS }); // IN1 + IN2 + RR -> RW, OUT (RES_MAN = RES_MAN + OP2_MAN) do({ NMEM, RR, NSH, SUM12S, RW, SUM12S, NCAR, NDIS }); end else // OP1_MAN digit == 0, so do not add to PSUM begin // squander clock cycle(s) for equalization do({ NOOP }); // IN1 -> RW, OUT (RES_MAN) do({ NMEM, RW, NSH, IN1, RW, IN1, NCAR, NDIS }); end // OUT -> IN1 (RES_MAN) // RW >> 1 w/0 -> OUT (next RES_MAN = RES_MAN >> 1 w/0) w_in_0 = 0; do({ OIN1, RW, `SH 1, ZEROS, R1, SRC, NCAR, NDIS }); end $display("2. Multiply: %d cycles", clock_ct - old_clock_ct); old_clock_ct = clock_ct; //// 3. Normalize and Encode //// Description: //// If the mantissa is 1 bit too long, //// increment the exponent, and //// truncate the mantissa by 1 bit; //// determine if the exponent is greater than the maximum; //// encode the exponent, mantissa, and sign, and //// handle zero, overflow, and underflow. //// Actions: //// if RES_MAN[54] then //// RES_EXP = RES_EXP + 1 //// RES_MAN = RES_MAN >> 1 w/0 //// excess(RES_EXP) = RES_EXP inverse_masked_by ANDEXP //// if OP1 == 0 || OP2 == 0 || RES_EXP < 0 then RESULT = 0 //// else RESULT <> 0 so //// if excess(RES_EXP) == 0 then 0 <= |RESULT| <= maximum so //// |RESULT| = (RES_EXP << 53) | RES_MAN //// else excess(RES_EXP) <> 0 so |RESULT| > maximum so //// |RESULT| = maximum //// RESULT = |RESULT| >> 1 w/sign //// store RESULT //// // IN1 -> RE (RES_MAN) // R3 -> OUT (RES_EXP) do({ NMEM, R3, NSH, IN1, RE, SRC, NCAR, NDIS }); // OUT -> IN2 (RES_EXP) // RE << 10 -> RE (RES_MAN << 10) e_in_0 = 0; do({ OIN2, RE, `SH 10, SRC, RE, ZEROS, NCAR, NDIS }); if (flags_0[OUT_MSB_F]) // extra bit exists, so RES_MAN 1 bit too large begin // RE << 1 -> RW (RES_MAN = RES_MAN w/o leading 1) do({ NMEM, RE, `SH 1, SRC, RW, ZEROS, NCAR, NDIS }); // carry_word( IN2 (RES_EXP) + 1 ) -> RR do({ NMEM, CI2, NDIS }); // IN2 + 1 + RR -> RE, OUT (RES_EXP = RES_EXP + 1) do({ NMEM, RR, NSH, SUM2S, RE, SUM2S, NCAR, NDIS }); end else // no extra bit exists, so RES_MAN is normalized begin // RE << 2 -> RW (RES_MAN = RES_MAN w/o leading 1) do({ NMEM, RE, `SH 2, SRC, RW, ZEROS, NCAR, NDIS }); // squander clock cycle(s) for equalization do({ NOOP }); // IN2 -> RE, OUT (RES_EXP) do({ NMEM, RE, NSH, IN2, RE, IN2, NCAR, NDIS }); end // sign(RES_EXP) | zero_res -> zero_res // OUT -> IN2 (RES_EXP) // RW >> 11 -> OUT (RES_MAN = RES_MAN >> 11 w/0) zero_res = zero_res | flags_0[OUT_MSB_F]; do({ OIN2, RW, `SH 11, SRC, RW, SRC, NCAR, NDIS }); // OUT -> IN1 (RES_MAN) // IN2 inverse_masked_by R2 -> OUT (excess RES_EXP using ANDEXP) do({ OIN1, R2, NSH, SRC, R2, `I 01000100, NCAR, NDIS }); if (zero_res) // RESULT = 0 // 0 -> RW ( |RESULT| = 0) do({ NMEM, RW, NSH, ZEROS, RW, ZEROS, NCAR, NDIS }); else // RESULT <=> 0 if (flags_0[OUT_ZER_F]) // 0 <= |RESULT| <= max // (RE << 53) | IN1 -> RW ( |RESULT| = (RES_EXP << 53) | RES_MAN ) do({ NMEM, RE, `SH 53, OR1S, RW, ZEROS, NCAR, NDIS }); else // |RESULT| > max // ones -> RW ( |RESULT| = limit ) do({ NMEM, RW, NSH, ONES, RW, ZEROS, NCAR, NDIS }); // sign_res -> w_in_0 // RW >> 1 -> OUT, R4 (RESULT >> 1 w/sign) w_in_0 = sign_res; do({ NMEM, RW, `SH 1, SRC, R4, SRC, NCAR, NDIS }); // OUT -> RES (RESULT) do({ A_RES, OMEM, NALU, NCAR, NDIS }); $display("3. Normalize and Encode: %d cycles", clock_ct - old_clock_ct); $display(" Total: %d cycles", clock_ct); dump; word = pe_0.mem[A_OP1]; $display(" OP1: sign %b, exponent %b,", word[`WORD_WIDTH-1], word[`WORD_WIDTH-2:52] ); $display(" mantissa %b", word[51:0] ); word = pe_0.mem[A_OP2]; $display("x OP2: sign %b, exponent %b,", word[`WORD_WIDTH-1], word[`WORD_WIDTH-2:52] ); $display(" mantissa %b", word[51:0] ); word = pe_0.mem[A_RES]; $display("= RES: sign %b, exponent %b,", word[`WORD_WIDTH-1], word[`WORD_WIDTH-2:52] ); $display(" mantissa %b", word[51:0] ); end task reset_regs; begin #NS reset = 1; #NS reset = 0; end endtask task do; input [`ADDR_WIDTH+44-1:0] i_0; begin instr_0 = i_0; #NS clock = 1; #NS clock = 0; clock_ct = clock_ct + 1; end endtask task dump; begin #NS dump_r_0 = 1; #NS dump_r_0 = 0; #NS dump_m_0 = 1; #NS dump_m_0 = 0; end endtask task form_feed; $write("\14"); endtask endmodule Host command: verilog Command arguments: pe.v pc16-3.v VERILOG-XL 1.6a.4 log file created Jul 14, 1994 19:16:07 * Copyright Cadence Design Systems, Inc. 1985, 1988. * * All Rights Reserved. Licensed Software. * * Confidential and proprietary information which is the * * property of Cadence Design Systems, Inc. * Compiling source file "pe.v" Compiling source file "pc16-3.v" Highest level modules: pe0 pe pe2 pe4 pc16 Multiplication, Floating Point, 64-Bit, 1 PE PE "PE_0" Reset, Clock Cycle # 0 1. Load and Add: 14 cycles 2. Multiply: 265 cycles 3. Normalize and Encode: 10 cycles Total: 289 cycles PE "PE_0" Port and Register Dump # 0, Clock Cycle # 289 PE_INSTR: mb_addr 0010 mb_srce mb_d_mem mb_d_in2 mb_d_in1 0 1 0 0 srce_reg shift reg_res dest_reg out_res 0000 000000 10101010 0000 00000000 car_in1 car_in2 car_nin1 car_nin2 car_srce car_val 0 0 0 0 0 0 alu_dis alu_dis_i mb_dis mb_dis_i (p) 0 0 0 0 FLAGS: reg<0 reg=0 out<0 out=0 in2<0 in2=0 in1<0 in1=0 1 0 0 1 0 0 0 0 disab carry (r) 0 0 IN1: 0000000000011100000000000000000000000000000000000000000000000000 (r) IN2: 0000000000000000000000000000000000000000000000000000010000000010 (r) OUT: 0000000000000000000000000000000000000000000000000000000000000000 (r) R0: 1100000000000000000000000000000000000000000000000000000000000000 (r) R1: 0000000000000000000000000000000000000000000000000000000000000000 (r) R2: 0000000000000000000000000000000000000000000000000000011111111111 (r) R3: 0000000000000000000000000000000000000000000000000000010000000010 (r) R4: 0100000000101110000000000000000000000000000000000000000000000000 (r) R5: 0000000000000000000000000000000000000000000000000000000000000000 (r) R6: 0000000000000000000000000000000000000000000000000000000000000000 (r) R7: 0000000000000000000000000000000000000000000000000000000000000000 (r) R8: 0000000000000000000000000000000000000000000000000000000000000000 (r) R9: 0000000000000000000000000000000000000000000000000000000000000000 (r) DISABLE: 0000000000000000000000000000000000000000000000000000000000000000 (r) ROUTER: 0000000000000000000000000000000000000000000000000000000000000000 (r) R_IN: 0000000000000000000000000000000000000000000000000000000000000000 (p/r) Note reading from R_IN is really reading from ROUTER! NORTH: 0000000000000000000000000000000000000000000000000000000000000000 (r) N_IN: 0000000000000000000000000000000000000000000000000000000000000000 (p) SOUTH: 0000000000000000000000000000000000000000000000000000000000000000 (r) S_IN: 0000000000000000000000000000000000000000000000000000000000000000 (p) EAST: 0000000000000000000000000000000000000000000000000000010000000010 (r) E_IN: 0 (p) W_IN: 0 (p) WEST: 1000000001011100000000000000000000000000000000000000000000000000 (r) PE "PE_0" Memory Dump # 0, Clock Clycle # 289 0: 4008000000000000 4014000000000000 2: 402e000000000000 000fffffffffffff 4: 0010000000000000 00000000000007ff 6: 00000000000003ff xxxxxxxxxxxxxxxx 8: xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx a: xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx c: xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx e: xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx OP1: sign 0, exponent 10000000000, mantissa 1000000000000000000000000000000000000000000000000000 x OP2: sign 0, exponent 10000000001, mantissa 0100000000000000000000000000000000000000000000000000 = RES: sign 0, exponent 10000000010, mantissa 1110000000000000000000000000000000000000000000000000 38 warnings 69569 simulation events CPU time: 1.4 secs to compile + 0.4 secs to link + 12.4 secs in simulation End of VERILOG-XL 1.6a.4 Jul 14, 1994 19:16:21 module pc17; // pc17-3.v // a Verilog-XL behavioral model of a reconfigurable processor configuration // for a 32-bit floating-point multiply with 2 PEs // (using complementation and variable-bit-shift EAST & WEST registers) // (highest level module; requires module pe3 in file pe.v) // for Dr. W. B. Ligon, E&CE dept., Clemson U., 1992-4 // by Ken Winiecki // system parameters: `define WORD_WIDTH 32 // width of PE word, in bits `define ADDR_WIDTH 4 // width of PE memory address, in bits `define MEM_LENGTH 16 // length of PE memory, in words `define MHZ 16.7 // clock speed, in "megahertz" // "`define"s are used to circumnavigate a Verilog-XL bug that // prevents "parameter"s from working as bit length specifiers!!! // variable declarations: reg clock, reset, dump_r_0, dump_m_0, e_in_0, w_in_0, zero_res, sign_res, dump_r_1, dump_m_1, e_in_1, w_in_1, digit_0, digit_1; reg [`WORD_WIDTH-1:0] word; reg [`ADDR_WIDTH+44-1:0] instr_0, instr_1; wire w_out_0, w_out_1; wire [9:0] flags_0, flags_1; wire [`WORD_WIDTH-1:0] s0_n1, n1_s0; integer clock_ct, old_clock_ct; // PE instances and connections: pe3 pe_0 (clock, reset, instr_0, flags_0, 0, n1_s0, e_in_0, w_in_0, 0, , s0_n1, , w_out_0, , 0,,,,, dump_r_0, dump_m_0), pe_1 (clock, reset, instr_1, flags_1, s0_n1, 0, e_in_1, w_in_1, 0, n1_s0, , , w_out_1, , 0,,,,, dump_r_1, dump_m_1); defparam // set PE-instance parameters to system parameters: pe_0.ADDR_WIDTH = `ADDR_WIDTH, pe_0.WORD_WIDTH = `WORD_WIDTH, pe_0.MEM_LENGTH = `MEM_LENGTH, pe_0.PE_NAME = "PE_0", pe_1.ADDR_WIDTH = `ADDR_WIDTH, pe_1.WORD_WIDTH = `WORD_WIDTH, pe_1.MEM_LENGTH = `MEM_LENGTH, pe_1.PE_NAME = "PE_1"; // PE instruction fields and bit widths: // // mb_addr mb_srce mb_d_mem mb_d_in2 mb_d_in1 ... // (`ADDR_WIDTH) 1 1 1 1 // // src_reg shift reg_res dest_reg out_res ... // 4 6 8 4 8 // // car_in1 car_in2 car_nin1 car_nin2 car_srce car_val ... // 1 1 1 1 1 1 // // alu_dis alu_dis_s mb_dis mb_dis_s // 1 1 1 1 parameter // PE instruction field descriptions and values: // mb_addr: memory bus transfer address // mb_srce: source of memory bus transfer is ... OUT = 1'b0, // ... register "OUT" MEM = 1'b1, // ... memory // mb_d_mem & mb_d_in2 & mb_d_in1: destinations of memory bus transfers F = 1'b0, // false T = 1'b1, // true // srce_reg & dest_reg: destination and source registers of ALU operations R0 = 4'b0000, R1 = 4'b0001, R2 = 4'b0010, R3 = 4'b0011, R4 = 4'b0100, R5 = 4'b0101, R6 = 4'b0110, R7 = 4'b0111, R8 = 4'b1000, R9 = 4'b1001, RD = 4'b1010, RR = 4'b1011, RN = 4'b1100, RS = 4'b1101, RE = 4'b1110, RW = 4'b1111, // shift: shift east or west source register "shift" bits before using // reg_res & out_res: ALU operations for "OUT" and destination reg results IN1 = 8'b11110000, IN2 = 8'b11001100, SRC = 8'b10101010, NIN1 = 8'b00001111, NIN2 = 8'b00110011, NSRC = 8'b01010101, ZEROS = 8'b00000000, ONES = 8'b11111111, AND12 = 8'b11000000, OR12 = 8'b11111100, XOR12 = 8'b00111100, NAND12 = 8'b00111111, NOR12 = 8'b00000011, NXOR12 = 8'b11000011, AND1S = 8'b10100000, OR1S = 8'b11111010, XOR1S = 8'b01011010, NAND1S = 8'b01011111, NOR1S = 8'b00000101, NXOR1S = 8'b10100101, SUMN1S = NXOR1S, SUM1S = XOR1S, AND2S = 8'b10001000, OR2S = 8'b11101110, XOR2S = 8'b01100110, NAND2S = 8'b01110111, NOR2S = 8'b00010001, NXOR2S = 8'b10011001, SUMN2S = NXOR2S, SUM2S = XOR2S, AND12S = 8'b10000000, OR12S = 8'b11111110, XOR12S = 8'b01111110, NAND12S = 8'b01111111, NOR12S = 8'b00000001, NXOR12S = 8'b10000001, SUM12S = 8'b10010110, SUM1N2S = 8'b01101001, SUM2N1S = SUM1N2S; // Note: The PE only does EITHER an ALU OR a carry operation in one // instruction cycle. A carry word is computed using registers IN_1 and/or // IN/2 and a carry-in bit, and a carry operation is implied by specifying // the registers to use. The carry word is made available at the input // from the router, r_in, and the carry-out is placed in the carry flag of // the p_flags register. // car_in1: use "IN1" in carry word computation; T or F // car_in2: use "IN2" in carry word computation; T or F // car_nin1: use inverse of "IN1" (if use was specified); T or F // car_nin2: use inverse of "IN2" (if use was specified); T or F // car_srce: use value for carry-in (or else use carry flag); T or F // car_val: use 1 for value of carry-in (if use was specified); T or F // alu_dis & mb_dis: allow disabling of ALU/memory bus operation; T or F // alu_dis_i & mb_dis_i: invert PE disable bit for ALU/memory bus op; T or F parameter // bit positions of PE flags: // Note that flags other than carry are not affected by a carry operation, // and the carry flag is not affected by an ALU operation. REG_MSB_F = 9, // m.s.b. of destination register REG_ZER_F = 8, // if destination register = 0 OUT_MSB_F = 7, // m.s.b. of register OUT OUT_ZER_F = 6, // if register OUT = 0 IN2_MSB_F = 5, // m.s.b. of register IN2 IN2_ZER_F = 4, // if register IN2 = 0 IN1_MSB_F = 3, // m.s.b. of register IN1 IN1_ZER_F = 2, // if register IN1 = 0 DISABLE_F = 1, // state of PE disable bit CARRY_F = 0; // carry-out of last carry operation // convenience parameters and definitions for writing PE instructions: parameter NS = 500/`MHZ; // when specifying clock timing `define W `WORD_WIDTH'h // for specifying memory values (in hex) `define M `ADDR_WIDTH'h // for specifying memory address (in hex) parameter MIN1 = { MEM, F, F, T }; // when moving memory only to "IN1" parameter MIN2 = { MEM, F, T, F }; // when moving memory only to "IN2" parameter OMEM = { OUT, T, F, F }; // when moving "OUT" only to memory parameter OIN1 = { `M 0, OUT, F, F, T }; // when moving "OUT" only to "IN1" parameter OIN2 = { `M 0, OUT, F, T, F }; // when moving "OUT" only to "IN2" parameter NMEM = { `M 0, OUT, F, F, F }; // when not using memory bus `define SH 6'd // for specifying shift (in decimal) parameter NSH = `SH 0; // when not shifting (not using east/west srce regs) `define I 8'b // for specifying ALU operation parameter NALU = { R0, NSH, SRC, R0, ZEROS }; // when not doing ALU ops // (note that flags are affected and OUT is cleared) parameter NCAR = { F, F, F, F, F, F }; // when not doing carry operations parameter C1P2 = { NALU, T, T, F, F, T, F }; // for carry for "IN1" + "IN2" parameter C1M2 = { NALU, T, T, F, T, T, T }; // for carry for "IN1" - "IN2" parameter C2M1 = { NALU, T, T, T, F, T, T }; // for carry for "IN2" - "IN1" parameter CM1 = { NALU, T, F, T, F, T, T }; // for carry to negate "IN1" parameter CM2 = { NALU, F, T, F, T, T, T }; // for carry to negate "IN2" parameter CI1 = { NALU, T, F, F, F, T, T }; // for carry to increment "IN1" parameter CI2 = { NALU, F, T, F, F, T, T }; // for carry to increment "IN2" parameter C0 = { NALU, T, F, F, F, T, F }; // for carry = 0 parameter NDIS = { F, F, F, F }; // when not allowing any disabling parameter NOOP = { NMEM, NALU, NCAR, NDIS }; // when squandering clock cycles // (note that flags are affected and OUT is cleared) // Write a program in Verilog-XL to control the processing elements directly, // and put it in the "initial" block to execute. Set up the PE memory. // Reset the PE by clocking it's p_reset line. (Clock a line using #NS to // delay each transition.) Build a PE instruction using the above defined // instruction fields and value parameters. Clock the PEs. Monitor the // flags of a PE using the above bit position parameters. Note: Verilog-XL // has no way of dealing realistically with "don't cares," so never allow the // use of "don't knows" (undefined values)!!! // 32-bit floating-point representation: // bits 0-22: 23 lower-order bits of 24-bit normalized unsigned mantissa // bits 23-30: 8-bit biased exponent (bias = $7F) // bit 31: 1-bit sign of mantissa // special floating-point values: // all 1s : positive maximum (not infinity) // all 1s except sign : negative maximum (not infinity) // all zeros : zero // all zeros except sign : negative minimum (not zero) // floating-point multiplication program: // 1. Load and Add // 2. Multiply // 3. Reduce // 4. Normalize and Encode // register assignments: // R0 = (operand 1) operand 1 mantissa // R1 = (operand 2) // R2 = AND-mask for isolating/testing right-justified exponent // R3 = result exponent // R4 = result // memory address definitions: parameter A_OP1 = `M 0, // address of first operand A_OP2 = `M 1, // address of second operand A_RES = `M 2, // address of result A_ANDMAN = `M 3, // address of AND-mask for isolating mantissa A_ORMAN = `M 4, // address of OR-mask for replacing leading 1 of // mantissa A_ANDEXP = `M 5, // address of AND-mask for isolating/testing // right-justified exponent A_BIAS = `M 6; // address of exponent bias initial begin form_feed; $display("Multiplication, Floating Point, 32-Bit, 2 PEs"); clock = 0; clock_ct = 0; old_clock_ct = 0; dump_r_0 = 0; dump_m_0 = 0; dump_r_1 = 0; dump_m_1 = 0; pe_0.mem[A_ANDMAN] = `W 007fffff; // mantissa AND-mask pe_0.mem[A_ORMAN] = `W 00800000; // mantissa OR-mask pe_0.mem[A_ANDEXP] = `W 000000ff; // exponent AND-mask pe_0.mem[A_BIAS] = `W 0000007f; // exponent bias pe_1.mem[A_ANDMAN] = pe_0.mem[A_ANDMAN]; pe_1.mem[A_ORMAN] = pe_0.mem[A_ORMAN]; pe_1.mem[A_ANDEXP] = pe_0.mem[A_ANDEXP]; pe_1.mem[A_BIAS] = pe_0.mem[A_BIAS]; pe_0.mem[A_OP1] = { 1'b 0, 8'h 80, 23'b 10000000000000000000000 }; // 3 pe_0.mem[A_OP2] = { 1'b 0, 8'h 81, 23'b 01000000000000000000000 }; // 5 pe_1.mem[A_OP1] = pe_0.mem[A_OP1]; pe_1.mem[A_OP2] = pe_0.mem[A_OP2]; reset_regs; //// 1. Load and Add //// Description: //// Load operands; determine zeros, signs, exponents, and mantissas; //// add exponents and correct bias; set up for multiply. //// Note corrected biased exponent sum could be negative. //// Note if zero is determined, exponent and mantissa values are //// immaterial. //// Actions: //// zero(RES) = zero(OP1) | zero(OP2) //// sign(RES) = (msb(OP1) ^ msb(OP2)) & ~zero(RES) //// RES_EXP = (OP1 >> 23 & ANDEXP) + (OP2 >> 23 & ANDEXP) - BIAS //// OP1_MAN_0 = (OP1 & ANDMAN) | ORMAN //// OP1_MAN_1 = ( (OP1 & ANDMAN) | ORMAN ) >> 12 //// OP2_MAN = (OP2 & ANDMAN) | ORMAN //// RES_MAN_x = 0 //// Output State: //// 0: OP1_MAN_0 -> R0 //// 1: OP1_MAN_1 -> R0 //// x: OP2_MAN -> IN2 //// x: RES_MAN_x -> OUT //// x: ANDEXP -> R2 //// x: RES_EXP -> R3 //// -: zero(RES) -> zero_res //// -: sign(RES) -> sign_res //// // x: OP1 -> IN1 // -: zero(OP1) -> zero_res // -: sign(OP1) -> sign_res instr_0 = { A_OP1, MIN1, NALU, NCAR, NDIS }; instr_1 = instr_0; clockem; zero_res = flags_0[IN1_ZER_F]; sign_res = flags_0[IN1_MSB_F]; // x: IN1 -> RW (OP1) // x: ANDEXP -> IN2 instr_0 = { A_ANDEXP, MIN2, R0, NSH, IN1, RW, ZEROS, NCAR, NDIS }; instr_1 = instr_0; clockem; // x: IN1 -> R0 (OP1) // x: OP2 -> IN1 // x: zero(OP2) | zero_res -> zero_res // x: (sign(OP2) ^ sign_res) & ~zero_res -> sign_res instr_0 = { A_OP2, MIN1, R0, NSH, IN1, R0, ZEROS, NCAR, NDIS }; instr_1 = instr_0; clockem; zero_res = zero_res | flags_0[IN1_ZER_F]; sign_res = (sign_res ^ flags_0[IN1_MSB_F]) & ~zero_res; // x: RW >> 23 & IN2 -> OUT (OP1_EXP = OP1 >> 23 & ANDEXP) // x: IN1 -> RW (OP2) w_in_0 = 0; w_in_1 = 0; instr_0 = { NMEM, RW, `SH 23, IN1, RW, AND2S, NCAR, NDIS }; instr_1 = instr_0; clockem; // x: RW >> 23 & IN2 -> OUT (OP2_EXP = OP2 >> 23 & ANDEXP) // x: IN1 -> R1 (OP2) // x: OUT -> IN1 (OP1_EXP) instr_0 = { OIN1, RW, `SH 23, IN1, R1, AND2S, NCAR, NDIS }; instr_1 = instr_0; clockem; // x: IN2 -> R2 (ANDEXP) // x: OUT -> IN2 (OP2_EXP) instr_0 = { OIN2, R0, NSH, IN2, R2, ZEROS, NCAR, NDIS }; instr_1 = instr_0; clockem; // x: carry_word( IN1 (OP1_EXP) + IN2 (OP2_EXP) ) -> RR instr_0 = { NMEM, C1P2, NDIS }; instr_1 = instr_0; clockem; // x: IN1 + IN2 + RR -> OUT (RES_EXP = OP1_EXP + OP2_EXP) // x: BIAS -> IN2 instr_0 = { A_BIAS, MIN2, RR, NSH, SRC, RR, SUM12S, NCAR, NDIS }; instr_1 = instr_0; clockem; // x: OUT -> IN1 (RES_EXP) instr_0 = { OIN1, NALU, NCAR, NDIS }; instr_1 = instr_0; clockem; // x: carry_word( IN1 (RES_EXP) - IN2 (BIAS) ) -> RR instr_0 = { NMEM, C1M2, NDIS }; instr_1 = instr_0; clockem; // x: IN1 - IN2 + RR -> R3 (RES_EXP = RES_EXP - BIAS) // x: ANDMAN -> IN1 instr_0 = { A_ANDMAN, MIN1, RR, NSH, SUM1N2S, R3, ZEROS, NCAR, NDIS }; instr_1 = instr_0; clockem; // x: ORMAN -> IN2 instr_0 = { A_ORMAN, MIN2, NALU, NCAR, NDIS }; instr_1 = instr_0; clockem; // x: (R1 & IN1) | IN2 -> OUT (OP2_MAN = (OP2 & ANDMAN) | ORMAN) instr_0 = { NMEM, R1, NSH, SRC, R1, `I 11101100, NCAR, NDIS }; instr_1 = instr_0; clockem; // x: OUT -> IN2 (OP2_MAN) // 0: (R0 & IN1) | IN2 -> R0 (OP1_MAN_0 = (OP1 & ANDMAN) | ORMAN) // 1: (R0 & IN1) | IN2 -> RW (OP1_MAN_1 = (OP1 & ANDMAN) | ORMAN) instr_0 = { OIN2, R0, NSH, `I 11101100, R0, ZEROS, NCAR, NDIS }; instr_1 = { OIN2, R0, NSH, `I 11101100, RW, ZEROS, NCAR, NDIS }; clockem; // x: 0 -> OUT (RES_MAN_x = 0) // 1: RW >> 12 -> R0 (OP1_MAN_1 = OP1_MAN_1 >> 12) instr_0 = { NMEM, NALU, NCAR, NDIS }; instr_1 = { NMEM, RW, `SH 12, SRC, R0, ZEROS, NCAR, NDIS }; clockem; $display("1. Load and Add: %d cycles", clock_ct - old_clock_ct); old_clock_ct = clock_ct; //// 2. Multiply //// Description: //// For a number of times equal to 1/2 the maximum number of bits in //// a mantissa including the leading 1, //// rotate one mantissa right; //// if the msb of the rotated mantissa is 1, //// add the other mantissa to the result mantissa; //// shift the result mantissa right 1 with 0. //// Note the lower bits of the result mantissa are not required //// because the leading 1s of the operands guarantee the bit length //// of the result to be either the maximum or one bit less. //// Note the carry-in to the shift of the result mantissa is always 0 //// because the operands summed are many bits shorter than a word. //// Actions: //// repeat 12: //// OP1_MAN_x = OP1_MAN_x >> 1 w/lsb(OP1_MAN_x) (rotated) //// if msb(OP1_MAN_x) == 1 then RES_MAN_x = RES_MAN_x + OP2_MAN //// else msb(OP1_MAN_x) == 0 so do nothing //// RES_MAN_x = RES_MAN_x >> 1 w/0 //// Output state: //// 0: RES_MAN_0 -> RW //// 1: RES_MAN_1 -> RW //// x: ANDEXP -> R2 //// x: RES_EXP -> R3 //// -: zero(RES) -> zero_res //// -: sign(RES) -> sign_res //// repeat (12) begin // x: OUT -> IN1 (RES_MAN_x) // x: R0 -> RW (OP1_MAN_x) instr_0 = { OIN1, R0, NSH, SRC, RW, ZEROS, NCAR, NDIS }; instr_1 = instr_0; clockem; // x: RW >> 1 w/w_out_0 -> R0 (OP1_MAN_x rotated right) // x: msb(R0) -> digit_x w_in_0 = w_out_0; w_in_1 = w_out_1; instr_0 = { NMEM, RW, `SH 1, SRC, R0, ZEROS, NCAR, NDIS }; instr_1 = instr_0; clockem; digit_0 = flags_0[REG_MSB_F]; digit_1 = flags_1[REG_MSB_F]; // x: carry_word(IN1 (RES_MAN_x) + IN2 (OP2_MAN)) -> RR instr_0 = { NMEM, C1P2, NDIS }; instr_1 = instr_0; clockem; if (digit_0) // OP1_MAN_0 digit == 1, so add to PSUM // 0: IN1 + IN2 + RR -> RW (RES_MAN_0 = RES_MAN_0 + OP2_MAN) instr_0 = { NMEM, RR, NSH, SUM12S, RW, ZEROS, NCAR, NDIS }; else // OP1_MAN_0 digit == 0, so do not add to PSUM // 0: IN1 -> RW (RES_MAN_0) instr_0 = { NMEM, RW, NSH, IN1, RW, ZEROS, NCAR, NDIS }; if (digit_1) // OP1_MAN_1 digit == 1, so add to PSUM // 1: IN1 + IN2 + RR -> RW (RES_MAN_1 = RES_MAN_1 + OP2_MAN) instr_1 = { NMEM, RR, NSH, SUM12S, RW, ZEROS, NCAR, NDIS }; else // OP1_MAN_1 digit == 0, so do not add to PSUM // 1: IN1 -> RW (RES_MAN_1) instr_1 = { NMEM, RW, NSH, IN1, RW, ZEROS, NCAR, NDIS }; clockem; // x: RW >> 1 w/0 -> OUT (next RES_MAN_x = RES_MAN_x >> 1 w/0) w_in_0 = 0; w_in_1 = 0; instr_0 = { NMEM, RW, `SH 1, ZEROS, R1, SRC, NCAR, NDIS }; instr_1 = instr_0; clockem; end $display("2. Multiply: %d cycles", clock_ct - old_clock_ct); old_clock_ct = clock_ct; //// 3. Reduce //// Description: //// Reduce the result mantissa from the parallel components. //// Actions: //// RES_MAN = RES_MAN_0 >> 12 + RES_MAN_1 //// Output state: //// x: RES_MAN -> RE //// x: ANDEXP -> R2 //// x: RES_EXP -> R3 //// -: zero(RES) -> zero_res //// -: sign(RES) -> sign_res //// // 0: RW >> 12 -> RS, OUT (RES_MAN_0 = RES_MAN_0 >> 12) // 1: RW -> RN, OUT (RES_MAN_1) instr_0 = { NMEM, RW, `SH 12, SRC, RS, SRC, NCAR, NDIS }; instr_1 = { NMEM, RW, NSH, SRC, RN, SRC, NCAR, NDIS }; clockem; // x: OUT -> IN1 (RES_MAN_x) // 0: RS -> OUT (RES_MAN_1) // 1: RN -> OUT (RES_MAN_0) instr_0 = { OIN1, RS, NSH, ZEROS, R1, SRC, NCAR, NDIS }; instr_1 = { OIN1, RN, NSH, ZEROS, R1, SRC, NCAR, NDIS }; clockem; // x: OUT -> IN2 (RES_MAN_!x) instr_0 = { OIN2, NALU, NCAR, NDIS }; instr_1 = instr_0; clockem; // x: carry_word( IN1 (RES_MAN_x) + IN2 (RES_MAN_!x) ) -> RR instr_0 = { NMEM, C1P2, NDIS }; instr_1 = instr_0; clockem; // x: IN1 + IN2 + RR -> RE (RES_MAN = RES_MAN_x + RES_MAN_!x) instr_0 = { NMEM, RR, NSH, SUM12S, RE, ZEROS, NCAR, NDIS }; instr_1 = instr_0; clockem; $display("3. Reduce: %d cycles", clock_ct - old_clock_ct); old_clock_ct = clock_ct; //// 4. Normalize and Encode //// Description: //// If the mantissa is 1 bit too long, //// increment the exponent, and //// truncate the mantissa by 1 bit; //// determine if the exponent is greater than the maximum; //// encode the exponent, mantissa, and sign, and //// handle zero, overflow, and underflow. //// Actions: //// if RES_MAN[25] then //// RES_EXP = RES_EXP + 1 //// RES_MAN = RES_MAN >> 1 w/0 //// excess(RES_EXP) = RES_EXP inverse_masked_by ANDEXP //// if OP1 == 0 || OP2 == 0 || RES_EXP < 0 then RESULT = 0 //// else RESULT <> 0 so //// if excess(RES_EXP) == 0 then 0 <= |RESULT| <= maximum so //// |RESULT| = (RES_EXP << 24) | RES_MAN //// else excess(RES_EXP) <> 0 so |RESULT| > maximum so //// |RESULT| = maximum //// RESULT = |RESULT| >> 1 w/sign //// store RESULT //// // x: R3 -> OUT (RES_EXP) instr_0 = { NMEM, R3, NSH, SRC, R3, SRC, NCAR, NDIS }; instr_1 = instr_0; clockem; // x: OUT -> IN2 (RES_EXP) // x: RE << 7 -> RE (RES_MAN << 7) e_in_0 = 0; e_in_1 = 0; instr_0 = { OIN2, RE, `SH 7, SRC, RE, ZEROS, NCAR, NDIS }; instr_1 = instr_0; clockem; if (flags_0[OUT_MSB_F]) // extra bit exists, so RES_MAN 1 bit too large begin // RE << 1 -> RW (RES_MAN = RES_MAN w/o leading 1) instr_0 = { NMEM, RE, `SH 1, SRC, RW, ZEROS, NCAR, NDIS }; instr_1 = instr_0; clockem; // carry_word( IN2 (RES_EXP) + 1 ) -> RR instr_0 = { NMEM, CI2, NDIS }; instr_1 = instr_0; clockem; // IN2 + 1 + RR -> RE, OUT (RES_EXP = RES_EXP + 1) instr_0 = { NMEM, RR, NSH, SUM2S, RE, SUM2S, NCAR, NDIS }; instr_1 = instr_0; clockem; end else // no extra bit exists, so RES_MAN is normalized begin // RE << 2 -> RW (RES_MAN = RES_MAN w/o leading 1) instr_0 = { NMEM, RE, `SH 2, SRC, RW, ZEROS, NCAR, NDIS }; instr_1 = instr_0; clockem; // squander clock cycle(s) for equalization instr_0 = { NOOP }; instr_1 = instr_0; clockem; // IN2 -> RE, OUT (RES_EXP) instr_0 = { NMEM, RE, NSH, IN2, RE, IN2, NCAR, NDIS }; instr_1 = instr_0; clockem; end // sign(RES_EXP) | zero_res -> zero_res // OUT -> IN2 (RES_EXP) // RW >> 8 -> OUT (RES_MAN = RES_MAN >> 8 w/0) zero_res = zero_res | flags_0[OUT_MSB_F]; instr_0 = { OIN2, RW, `SH 8, SRC, RW, SRC, NCAR, NDIS }; instr_1 = instr_0; clockem; // OUT -> IN1 (RES_MAN) // IN2 inverse_masked_by R2 -> OUT (excess RES_EXP using ANDEXP) instr_0 = { OIN1, R2, NSH, SRC, R2, `I 01000100, NCAR, NDIS }; instr_1 = instr_0; clockem; if (zero_res) // RESULT = 0 begin // 0 -> RW ( |RESULT| = 0) instr_0 = { NMEM, RW, NSH, ZEROS, RW, ZEROS, NCAR, NDIS }; instr_1 = instr_0; clockem; end else // RESULT <=> 0 if (flags_0[OUT_ZER_F]) // 0 <= |RESULT| <= max begin // (RE << 24) | IN1 -> RW ( |RESULT| = (RES_EXP << 24) | RES_MAN ) instr_0 = { NMEM, RE, `SH 24, OR1S, RW, ZEROS, NCAR, NDIS }; instr_1 = instr_0; clockem; end else // |RESULT| > max begin // ones -> RW ( |RESULT| = limit ) instr_0 = { NMEM, RW, NSH, ONES, RW, ZEROS, NCAR, NDIS }; instr_1 = instr_0; clockem; end // sign_res -> w_in_0 // RW >> 1 -> OUT, R4 (RESULT >> 1 w/sign) w_in_0 = sign_res; w_in_1 = sign_res; instr_0 = { NMEM, RW, `SH 1, SRC, R4, SRC, NCAR, NDIS }; instr_1 = instr_0; clockem; // OUT -> RES (RESULT) instr_0 = { A_RES, OMEM, NALU, NCAR, NDIS }; instr_1 = instr_0; clockem; $display("4. Normalize and Encode: %d cycles", clock_ct - old_clock_ct); $display(" Total: %d cycles", clock_ct); $display; word = pe_0.mem[A_OP1]; $display(" OP1: sign %b, exponent %b ($%x),", word[`WORD_WIDTH-1], word[`WORD_WIDTH-2:23], word[`WORD_WIDTH-2:23] ); $display(" mantissa %b", word[22:0] ); word = pe_0.mem[A_OP2]; $display("x OP2: sign %b, exponent %b ($%x),", word[`WORD_WIDTH-1], word[`WORD_WIDTH-2:23], word[`WORD_WIDTH-2:23] ); $display(" mantissa %b", word[22:0] ); word = pe_0.mem[A_RES]; $display("= RES: sign %b, exponent %b ($%x),", word[`WORD_WIDTH-1], word[`WORD_WIDTH-2:23], word[`WORD_WIDTH-2:23] ); $display(" mantissa %b", word[22:0] ); $display; end task reset_regs; begin #NS reset = 1; #NS reset = 0; end endtask task clockem; begin #NS clock = 1; #NS clock = 0; clock_ct = clock_ct + 1; end endtask task dump; begin #NS dump_r_0 = 1; #NS dump_r_0 = 0; #NS dump_m_0 = 1; #NS dump_m_0 = 0; form_feed; #NS dump_r_1 = 1; #NS dump_r_1 = 0; #NS dump_m_1 = 1; #NS dump_m_1 = 0; end endtask task form_feed; $write("\14"); endtask endmodule Host command: verilog Command arguments: pe.v pc17-3.v VERILOG-XL 1.6a.4 log file created Jul 14, 1994 19:16:29 * Copyright Cadence Design Systems, Inc. 1985, 1988. * * All Rights Reserved. Licensed Software. * * Confidential and proprietary information which is the * * property of Cadence Design Systems, Inc. * Compiling source file "pe.v" Compiling source file "pc17-3.v" Warning! Port sizes differ in port connection (port 14) [Verilog-PCDPC] "pc17-3.v", 30: 0 Warning! Port sizes differ in port connection (port 14) [Verilog-PCDPC] "pc17-3.v", 34: 0 Highest level modules: pe0 pe pe2 pe4 pc17 Multiplication, Floating Point, 32-Bit, 2 PEs PE "PE_1" Reset, Clock Cycle # 0 PE "PE_0" Reset, Clock Cycle # 0 1. Load and Add: 15 cycles 2. Multiply: 60 cycles 3. Reduce: 5 cycles 4. Normalize and Encode: 10 cycles Total: 90 cycles OP1: sign 0, exponent 10000000 ($80), mantissa 10000000000000000000000 x OP2: sign 0, exponent 10000001 ($81), mantissa 01000000000000000000000 = RES: sign 0, exponent 10000010 ($82), mantissa 11100000000000000000000 36 warnings 23345 simulation events CPU time: 1.4 secs to compile + 0.6 secs to link + 3.8 secs in simulation End of VERILOG-XL 1.6a.4 Jul 14, 1994 19:16:35 module pc18; // pc18-3.v // a Verilog-XL behavioral model of a reconfigurable processor configuration // for a 64-bit floating-point multiply with 2 PEs // (using complementation and variable-bit-shift EAST & WEST registers) // (highest level module; requires module pe3 in file pe.v) // for Dr. W. B. Ligon, E&CE dept., Clemson U., 1992-4 // by Ken Winiecki // system parameters: `define WORD_WIDTH 64 // width of PE word, in bits `define ADDR_WIDTH 4 // width of PE memory address, in bits `define MEM_LENGTH 16 // length of PE memory, in words `define MHZ 16.7 // clock speed, in "megahertz" // "`define"s are used to circumnavigate a Verilog-XL bug that // prevents "parameter"s from working as bit length specifiers!!! // variable declarations: reg clock, reset, dump_r_0, dump_m_0, e_in_0, w_in_0, zero_res, sign_res, dump_r_1, dump_m_1, e_in_1, w_in_1, digit_0, digit_1; reg [`WORD_WIDTH-1:0] word; reg [`ADDR_WIDTH+44-1:0] instr_0, instr_1; wire w_out_0, w_out_1; wire [9:0] flags_0, flags_1; wire [`WORD_WIDTH-1:0] s0_n1, n1_s0; integer clock_ct, old_clock_ct; // PE instances and connections: pe3 pe_0 (clock, reset, instr_0, flags_0, 0, n1_s0, e_in_0, w_in_0, 0, , s0_n1, , w_out_0, , 0,,,,, dump_r_0, dump_m_0), pe_1 (clock, reset, instr_1, flags_1, s0_n1, 0, e_in_1, w_in_1, 0, n1_s0, , , w_out_1, , 0,,,,, dump_r_1, dump_m_1); defparam // set PE-instance parameters to system parameters: pe_0.ADDR_WIDTH = `ADDR_WIDTH, pe_0.WORD_WIDTH = `WORD_WIDTH, pe_0.MEM_LENGTH = `MEM_LENGTH, pe_0.PE_NAME = "PE_0", pe_1.ADDR_WIDTH = `ADDR_WIDTH, pe_1.WORD_WIDTH = `WORD_WIDTH, pe_1.MEM_LENGTH = `MEM_LENGTH, pe_1.PE_NAME = "PE_1"; // PE instruction fields and bit widths: // // mb_addr mb_srce mb_d_mem mb_d_in2 mb_d_in1 ... // (`ADDR_WIDTH) 1 1 1 1 // // src_reg shift reg_res dest_reg out_res ... // 4 6 8 4 8 // // car_in1 car_in2 car_nin1 car_nin2 car_srce car_val ... // 1 1 1 1 1 1 // // alu_dis alu_dis_s mb_dis mb_dis_s // 1 1 1 1 parameter // PE instruction field descriptions and values: // mb_addr: memory bus transfer address // mb_srce: source of memory bus transfer is ... OUT = 1'b0, // ... register "OUT" MEM = 1'b1, // ... memory // mb_d_mem & mb_d_in2 & mb_d_in1: destinations of memory bus transfers F = 1'b0, // false T = 1'b1, // true // srce_reg & dest_reg: destination and source registers of ALU operations R0 = 4'b0000, R1 = 4'b0001, R2 = 4'b0010, R3 = 4'b0011, R4 = 4'b0100, R5 = 4'b0101, R6 = 4'b0110, R7 = 4'b0111, R8 = 4'b1000, R9 = 4'b1001, RD = 4'b1010, RR = 4'b1011, RN = 4'b1100, RS = 4'b1101, RE = 4'b1110, RW = 4'b1111, // shift: shift east or west source register "shift" bits before using // reg_res & out_res: ALU operations for "OUT" and destination reg results IN1 = 8'b11110000, IN2 = 8'b11001100, SRC = 8'b10101010, NIN1 = 8'b00001111, NIN2 = 8'b00110011, NSRC = 8'b01010101, ZEROS = 8'b00000000, ONES = 8'b11111111, AND12 = 8'b11000000, OR12 = 8'b11111100, XOR12 = 8'b00111100, NAND12 = 8'b00111111, NOR12 = 8'b00000011, NXOR12 = 8'b11000011, AND1S = 8'b10100000, OR1S = 8'b11111010, XOR1S = 8'b01011010, NAND1S = 8'b01011111, NOR1S = 8'b00000101, NXOR1S = 8'b10100101, SUMN1S = NXOR1S, SUM1S = XOR1S, AND2S = 8'b10001000, OR2S = 8'b11101110, XOR2S = 8'b01100110, NAND2S = 8'b01110111, NOR2S = 8'b00010001, NXOR2S = 8'b10011001, SUMN2S = NXOR2S, SUM2S = XOR2S, AND12S = 8'b10000000, OR12S = 8'b11111110, XOR12S = 8'b01111110, NAND12S = 8'b01111111, NOR12S = 8'b00000001, NXOR12S = 8'b10000001, SUM12S = 8'b10010110, SUM1N2S = 8'b01101001, SUM2N1S = SUM1N2S; // Note: The PE only does EITHER an ALU OR a carry operation in one // instruction cycle. A carry word is computed using registers IN_1 and/or // IN/2 and a carry-in bit, and a carry operation is implied by specifying // the registers to use. The carry word is made available at the input // from the router, r_in, and the carry-out is placed in the carry flag of // the p_flags register. // car_in1: use "IN1" in carry word computation; T or F // car_in2: use "IN2" in carry word computation; T or F // car_nin1: use inverse of "IN1" (if use was specified); T or F // car_nin2: use inverse of "IN2" (if use was specified); T or F // car_srce: use value for carry-in (or else use carry flag); T or F // car_val: use 1 for value of carry-in (if use was specified); T or F // alu_dis & mb_dis: allow disabling of ALU/memory bus operation; T or F // alu_dis_i & mb_dis_i: invert PE disable bit for ALU/memory bus op; T or F parameter // bit positions of PE flags: // Note that flags other than carry are not affected by a carry operation, // and the carry flag is not affected by an ALU operation. REG_MSB_F = 9, // m.s.b. of destination register REG_ZER_F = 8, // if destination register = 0 OUT_MSB_F = 7, // m.s.b. of register OUT OUT_ZER_F = 6, // if register OUT = 0 IN2_MSB_F = 5, // m.s.b. of register IN2 IN2_ZER_F = 4, // if register IN2 = 0 IN1_MSB_F = 3, // m.s.b. of register IN1 IN1_ZER_F = 2, // if register IN1 = 0 DISABLE_F = 1, // state of PE disable bit CARRY_F = 0; // carry-out of last carry operation // convenience parameters and definitions for writing PE instructions: parameter NS = 500/`MHZ; // when specifying clock timing `define W `WORD_WIDTH'h // for specifying memory values (in hex) `define M `ADDR_WIDTH'h // for specifying memory address (in hex) parameter MIN1 = { MEM, F, F, T }; // when moving memory only to "IN1" parameter MIN2 = { MEM, F, T, F }; // when moving memory only to "IN2" parameter OMEM = { OUT, T, F, F }; // when moving "OUT" only to memory parameter OIN1 = { `M 0, OUT, F, F, T }; // when moving "OUT" only to "IN1" parameter OIN2 = { `M 0, OUT, F, T, F }; // when moving "OUT" only to "IN2" parameter NMEM = { `M 0, OUT, F, F, F }; // when not using memory bus `define SH 6'd // for specifying shift (in decimal) parameter NSH = `SH 0; // when not shifting (not using east/west srce regs) `define I 8'b // for specifying ALU operation parameter NALU = { R0, NSH, SRC, R0, ZEROS }; // when not doing ALU ops // (note that flags are affected and OUT is cleared) parameter NCAR = { F, F, F, F, F, F }; // when not doing carry operations parameter C1P2 = { NALU, T, T, F, F, T, F }; // for carry for "IN1" + "IN2" parameter C1M2 = { NALU, T, T, F, T, T, T }; // for carry for "IN1" - "IN2" parameter C2M1 = { NALU, T, T, T, F, T, T }; // for carry for "IN2" - "IN1" parameter CM1 = { NALU, T, F, T, F, T, T }; // for carry to negate "IN1" parameter CM2 = { NALU, F, T, F, T, T, T }; // for carry to negate "IN2" parameter CI1 = { NALU, T, F, F, F, T, T }; // for carry to increment "IN1" parameter CI2 = { NALU, F, T, F, F, T, T }; // for carry to increment "IN2" parameter C0 = { NALU, T, F, F, F, T, F }; // for carry = 0 parameter NDIS = { F, F, F, F }; // when not allowing any disabling parameter NOOP = { NMEM, NALU, NCAR, NDIS }; // when squandering clock cycles // (note that flags are affected and OUT is cleared) // Write a program in Verilog-XL to control the processing elements directly, // and put it in the "initial" block to execute. Set up the PE memory. // Reset the PE by clocking it's p_reset line. (Clock a line using #NS to // delay each transition.) Build a PE instruction using the above defined // instruction fields and value parameters. Clock the PEs. Monitor the // flags of a PE using the above bit position parameters. Note: Verilog-XL // has no way of dealing realistically with "don't cares," so never allow the // use of "don't knows" (undefined values)!!! // 64-bit floating-point representation: // bits 0-51: 52 lower-order bits of 53-bit normalized unsigned mantissa // bits 52-62: 11-bit biased exponent (bias = $3ff) // bit 63: 1-bit sign of mantissa // special floating-point values: // all 1s : positive maximum (not infinity) // all 1s except sign : negative maximum (not infinity) // all zeros : zero // all zeros except sign : negative minimum (not zero) // floating-point multiplication program: // 1. Load and Add // 2. Multiply // 3. Reduce // 4. Normalize and Encode // register assignments: // R0 = (operand 1) operand 1 mantissa // R1 = (operand 2) // R2 = AND-mask for isolating/testing right-justified exponent // R3 = result exponent // R4 = result // memory address definitions: parameter A_OP1 = `M 0, // address of first operand A_OP2 = `M 1, // address of second operand A_RES = `M 2, // address of result A_ANDMAN = `M 3, // address of AND-mask for isolating mantissa A_ORMAN = `M 4, // address of OR-mask for replacing leading 1 of // mantissa A_ANDEXP = `M 5, // address of AND-mask for isolating/testing // right-justified exponent A_BIAS = `M 6; // address of exponent bias initial begin form_feed; $display("Multiplication, Floating Point, 64-Bit, 2 PEs"); clock = 0; clock_ct = 0; old_clock_ct = 0; dump_r_0 = 0; dump_m_0 = 0; dump_r_1 = 0; dump_m_1 = 0; // ++++----++++---- pe_0.mem[A_ANDMAN] = `W 000fffffffffffff; // mantissa AND-mask pe_0.mem[A_ORMAN] = `W 0010000000000000; // mantissa OR-mask pe_0.mem[A_ANDEXP] = `W 00000000000007ff; // exponent AND-mask pe_0.mem[A_BIAS] = `W 00000000000003ff; // exponent bias pe_1.mem[A_ANDMAN] = pe_0.mem[A_ANDMAN]; pe_1.mem[A_ORMAN] = pe_0.mem[A_ORMAN]; pe_1.mem[A_ANDEXP] = pe_0.mem[A_ANDEXP]; pe_1.mem[A_BIAS] = pe_0.mem[A_BIAS]; // +++----++++ pe_0.mem[A_OP1] = { 1'b 0, 11'b 10000000000, 52'b 1000000000000000000000000000000000000000000000000000 }; // 3 // ++++----++++----++++----++++----++++----++++----++++ pe_0.mem[A_OP2] = { 1'b 0, 11'b 10000000001, 52'b 0100000000000000000000000000000000000000000000000000 }; // 5 pe_1.mem[A_OP1] = pe_0.mem[A_OP1]; pe_1.mem[A_OP2] = pe_0.mem[A_OP2]; reset_regs; //// 1. Load and Add //// Description: //// Load operands; determine zeros, signs, exponents, and mantissas; //// add exponents and correct bias; set up for multiply. //// Note corrected biased exponent sum could be negative. //// Note if zero is determined, exponent and mantissa values are //// immaterial. //// Actions: //// zero(RES) = zero(OP1) | zero(OP2) //// sign(RES) = (msb(OP1) ^ msb(OP2)) & ~zero(RES) //// RES_EXP = (OP1 >> 52 & ANDEXP) + (OP2 >> 52 & ANDEXP) - BIAS //// OP1_MAN_0 = (OP1 & ANDMAN) | ORMAN //// OP1_MAN_1 = ( (OP1 & ANDMAN) | ORMAN ) >> 26 //// OP2_MAN = (OP2 & ANDMAN) | ORMAN //// RES_MAN = 0 //// Output State: //// 0: OP1_MAN_0 -> R0 //// 1: OP1_MAN_1 -> R0 //// x: OP2_MAN -> IN2 //// x: RES_MAN_x -> OUT //// x: ANDEXP -> R2 //// x: RES_EXP -> R3 //// -: zero(RES) -> zero_res //// -: sign(RES) -> sign_res //// // x: OP1 -> IN1 // -: zero(OP1) -> zero_res // -: sign(OP1) -> sign_res instr_0 = { A_OP1, MIN1, NALU, NCAR, NDIS }; instr_1 = instr_0; clockem; zero_res = flags_0[IN1_ZER_F]; sign_res = flags_0[IN1_MSB_F]; // x: IN1 -> RW (OP1) // x: ANDEXP -> IN2 instr_0 = { A_ANDEXP, MIN2, R0, NSH, IN1, RW, ZEROS, NCAR, NDIS }; instr_1 = instr_0; clockem; // x: IN1 -> R0 (OP1) // x: OP2 -> IN1 // x: zero(OP2) | zero_res -> zero_res // x: (sign(OP2) ^ sign_res) & ~zero_res -> sign_res instr_0 = { A_OP2, MIN1, R0, NSH, IN1, R0, ZEROS, NCAR, NDIS }; instr_1 = instr_0; clockem; zero_res = zero_res | flags_0[IN1_ZER_F]; sign_res = (sign_res ^ flags_0[IN1_MSB_F]) & ~zero_res; // x: RW >> 52 & IN2 -> OUT (OP1_EXP = OP1 >> 52 & ANDEXP) // x: IN1 -> RW (OP2) w_in_0 = 0; w_in_1 = 0; instr_0 = { NMEM, RW, `SH 52, IN1, RW, AND2S, NCAR, NDIS }; instr_1 = instr_0; clockem; // x: RW >> 52 & IN2 -> OUT (OP2_EXP = OP2 >> 52 & ANDEXP) // x: IN1 -> R1 (OP2) // x: OUT -> IN1 (OP1_EXP) instr_0 = { OIN1, RW, `SH 52, IN1, R1, AND2S, NCAR, NDIS }; instr_1 = instr_0; clockem; // x: IN2 -> R2 (ANDEXP) // x: OUT -> IN2 (OP2_EXP) instr_0 = { OIN2, R0, NSH, IN2, R2, ZEROS, NCAR, NDIS }; instr_1 = instr_0; clockem; // x: carry_word( IN1 (OP1_EXP) + IN2 (OP2_EXP) ) -> RR instr_0 = { NMEM, C1P2, NDIS }; instr_1 = instr_0; clockem; // x: IN1 + IN2 + RR -> OUT (RES_EXP = OP1_EXP + OP2_EXP) // x: BIAS -> IN2 instr_0 = { A_BIAS, MIN2, RR, NSH, SRC, RR, SUM12S, NCAR, NDIS }; instr_1 = instr_0; clockem; // x: OUT -> IN1 (RES_EXP) instr_0 = { OIN1, NALU, NCAR, NDIS }; instr_1 = instr_0; clockem; // x: carry_word( IN1 (RES_EXP) - IN2 (BIAS) ) -> RR instr_0 = { NMEM, C1M2, NDIS }; instr_1 = instr_0; clockem; // x: IN1 - IN2 + RR -> R3 (RES_EXP = RES_EXP - BIAS) // x: ANDMAN -> IN1 instr_0 = { A_ANDMAN, MIN1, RR, NSH, SUM1N2S, R3, ZEROS, NCAR, NDIS }; instr_1 = instr_0; clockem; // x: ORMAN -> IN2 instr_0 = { A_ORMAN, MIN2, NALU, NCAR, NDIS }; instr_1 = instr_0; clockem; // x: (R1 & IN1) | IN2 -> OUT (OP2_MAN = (OP2 & ANDMAN) | ORMAN) instr_0 = { NMEM, R1, NSH, SRC, R1, `I 11101100, NCAR, NDIS }; instr_1 = instr_0; clockem; // x: OUT -> IN2 (OP2_MAN) // 0: (R0 & IN1) | IN2 -> R0 (OP1_MAN_0 = (OP1 & ANDMAN) | ORMAN) // 1: (R0 & IN1) | IN2 -> RW (OP1_MAN_1 = (OP1 & ANDMAN) | ORMAN) instr_0 = { OIN2, R0, NSH, `I 11101100, R0, ZEROS, NCAR, NDIS }; instr_1 = { OIN2, R0, NSH, `I 11101100, RW, ZEROS, NCAR, NDIS }; clockem; // x: 0 -> OUT (RES_MAN_x = 0) // 1: RW >> 26 -> R0 (OP1_MAN_1 = OP1_MAN_1 >> 26) instr_0 = { NMEM, NALU, NCAR, NDIS }; instr_1 = { NMEM, RW, `SH 26, SRC, R0, ZEROS, NCAR, NDIS }; clockem; $display("1. Load and Add: %d cycles", clock_ct - old_clock_ct); old_clock_ct = clock_ct; //// 2. Multiply //// Description: //// For a number of times equal to 1/2 the maximum number of bits in //// a mantissa including the leading 1, //// rotate one mantissa right; //// if the msb of the rotated mantissa is 1, //// add the other mantissa to the result mantissa; //// shift the result mantissa right 1 with 0. //// Since there are an odd number of bits, perform the above sequence //// once more, but only on the processor with the larger part. //// Note the lower bits of the result mantissa are not required //// because the leading 1s of the operands guarantee the bit length //// of the result to be either the maximum or one bit less. //// Note the carry-in to the shift of the result mantissa is always 0 //// because the operands summed are many bits shorter than a word. //// Actions: //// repeat 26: //// OP1_MAN_x = OP1_MAN_x >> 1 w/lsb(OP1_MAN_x) (rotated) //// if msb(OP1_MAN_x) == 1 then RES_MAN_x = RES_MAN_x + OP2_MAN //// else msb(OP1_MAN_x) == 0 so do nothing //// RES_MAN_x = RES_MAN_x >> 1 w/0 //// OP1_MAN_1 = OP1_MAN_1 >> 1 w/lsb(OP1_MAN_1) (rotated) //// if msb(OP1_MAN_1) == 1 then RES_MAN_1 = RES_MAN_1 + OP2_MAN //// else msb(OP1_MAN_1) == 0 so do nothing //// RES_MAN_1 = RES_MAN_1 >> 1 w/0 //// Output state: //// 0: RES_MAN_0 -> RW //// 1: RES_MAN_1 -> RW //// x: ANDEXP -> R2 //// x: RES_EXP -> R3 //// -: zero(RES) -> zero_res //// -: sign(RES) -> sign_res //// repeat (26) begin // x: OUT -> IN1 (RES_MAN_x) // x: R0 -> RW (OP1_MAN_x) instr_0 = { OIN1, R0, NSH, SRC, RW, ZEROS, NCAR, NDIS }; instr_1 = instr_0; clockem; // x: RW >> 1 w/w_out_0 -> R0 (OP1_MAN_x rotated right) // x: msb(R0) -> digit_x w_in_0 = w_out_0; w_in_1 = w_out_1; instr_0 = { NMEM, RW, `SH 1, SRC, R0, ZEROS, NCAR, NDIS }; instr_1 = instr_0; clockem; digit_0 = flags_0[REG_MSB_F]; digit_1 = flags_1[REG_MSB_F]; // x: carry_word(IN1 (RES_MAN_x) + IN2 (OP2_MAN)) -> RR instr_0 = { NMEM, C1P2, NDIS }; instr_1 = instr_0; clockem; if (digit_0) // OP1_MAN_0 digit == 1, so add to PSUM // 0: IN1 + IN2 + RR -> RW (RES_MAN_0 = RES_MAN_0 + OP2_MAN) instr_0 = { NMEM, RR, NSH, SUM12S, RW, ZEROS, NCAR, NDIS }; else // OP1_MAN_0 digit == 0, so do not add to PSUM // 0: IN1 -> RW (RES_MAN_0) instr_0 = { NMEM, RW, NSH, IN1, RW, ZEROS, NCAR, NDIS }; if (digit_1) // OP1_MAN_1 digit == 1, so add to PSUM // 1: IN1 + IN2 + RR -> RW (RES_MAN_1 = RES_MAN_1 + OP2_MAN) instr_1 = { NMEM, RR, NSH, SUM12S, RW, ZEROS, NCAR, NDIS }; else // OP1_MAN_1 digit == 0, so do not add to PSUM // 1: IN1 -> RW (RES_MAN_1) instr_1 = { NMEM, RW, NSH, IN1, RW, ZEROS, NCAR, NDIS }; clockem; // x: RW >> 1 w/0 -> OUT (next RES_MAN_x = RES_MAN_x >> 1 w/0) w_in_0 = 0; w_in_1 = 0; instr_0 = { NMEM, RW, `SH 1, ZEROS, R1, SRC, NCAR, NDIS }; instr_1 = instr_0; clockem; end // 0: NOOP // 1: OUT -> IN1 (RES_MAN_1) // 1: R0 -> RW (OP1_MAN_1) instr_0 = { NOOP }; instr_1 = { OIN1, R0, NSH, SRC, RW, ZEROS, NCAR, NDIS }; clockem; // 1: RW >> 1 w/w_out_0 -> R0 (OP1_MAN_1 rotated right) // 1: msb(R0) -> digit_1 w_in_1 = w_out_1; instr_1 = { NMEM, RW, `SH 1, SRC, R0, ZEROS, NCAR, NDIS }; clockem; digit_1 = flags_1[REG_MSB_F]; // 1: carry_word(IN1 (RES_MAN_1) + IN2 (OP2_MAN)) -> RR instr_1 = { NMEM, C1P2, NDIS }; clockem; if (digit_1) // 1: OP1_MAN_1 digit == 1, so add to PSUM // 1: IN1 + IN2 + RR -> RW (RES_MAN_1 = RES_MAN_1 + OP2_MAN) instr_1 = { NMEM, RR, NSH, SUM12S, RW, ZEROS, NCAR, NDIS }; else // 1: OP1_MAN_1 digit == 0, so do not add to PSUM // 1: IN1 -> RW (RES_MAN_1) instr_1 = { NMEM, RW, NSH, IN1, RW, ZEROS, NCAR, NDIS }; clockem; // 1: RW >> 1 w/0 -> OUT (next RES_MAN_1 = RES_MAN_1 >> 1 w/0) w_in_1 = 0; instr_1 = { NMEM, RW, `SH 1, ZEROS, R1, SRC, NCAR, NDIS }; clockem; $display("2. Multiply: %d cycles", clock_ct - old_clock_ct); old_clock_ct = clock_ct; //// 3. Reduce //// Description: //// Reduce the result mantissa from the parallel components. //// Actions: //// RES_MAN = (RES_MAN_0 >> 27) + RES_MAN_1 //// Output state: //// x: RES_MAN -> RE //// x: ANDEXP -> R2 //// x: RES_EXP -> R3 //// -: zero(RES) -> zero_res //// -: sign(RES) -> sign_res //// // 0: RW >> 27 -> RS, OUT (RES_MAN_0 = RES_MAN_0 >> 27) // 1: RW -> RN, OUT (RES_MAN_1) instr_0 = { NMEM, RW, `SH 27, SRC, RS, SRC, NCAR, NDIS }; instr_1 = { NMEM, RW, NSH, SRC, RN, SRC, NCAR, NDIS }; clockem; // x: OUT -> IN1 (RES_MAN_x) // 0: RS -> OUT (RES_MAN_1) // 1: RN -> OUT (RES_MAN_0) instr_0 = { OIN1, RS, NSH, ZEROS, R1, SRC, NCAR, NDIS }; instr_1 = { OIN1, RN, NSH, ZEROS, R1, SRC, NCAR, NDIS }; clockem; // x: OUT -> IN2 (RES_MAN_!x) instr_0 = { OIN2, NALU, NCAR, NDIS }; instr_1 = instr_0; clockem; // x: carry_word( IN1 (RES_MAN_x) + IN2 (RES_MAN_!x) ) -> RR instr_0 = { NMEM, C1P2, NDIS }; instr_1 = instr_0; clockem; // x: IN1 + IN2 + RR -> RE (RES_MAN = RES_MAN_x + RES_MAN_!x) instr_0 = { NMEM, RR, NSH, SUM12S, RE, ZEROS, NCAR, NDIS }; instr_1 = instr_0; clockem; $display("3. Reduce: %d cycles", clock_ct - old_clock_ct); old_clock_ct = clock_ct; //// 4. Normalize and Encode //// Description: //// If the mantissa is 1 bit too long, //// increment the exponent, and //// truncate the mantissa by 1 bit; //// determine if the exponent is greater than the maximum; //// encode the exponent, mantissa, and sign, and //// handle zero, overflow, and underflow. //// Actions: //// if RES_MAN[54] then //// RES_EXP = RES_EXP + 1 //// RES_MAN = RES_MAN >> 1 w/0 //// excess(RES_EXP) = RES_EXP inverse_masked_by ANDEXP //// if OP1 == 0 || OP2 == 0 || RES_EXP < 0 then RESULT = 0 //// else RESULT <> 0 so //// if excess(RES_EXP) == 0 then 0 <= |RESULT| <= maximum so //// |RESULT| = (RES_EXP << 53) | RES_MAN //// else excess(RES_EXP) <> 0 so |RESULT| > maximum so //// |RESULT| = maximum //// RESULT = |RESULT| >> 1 w/sign //// store RESULT //// // R3 -> OUT (RES_EXP) instr_0 = { NMEM, R3, NSH, SRC, R3, SRC, NCAR, NDIS }; instr_1 = instr_0; clockem; // OUT -> IN2 (RES_EXP) // RE << 10 -> RE (RES_MAN << 10) e_in_0 = 0; e_in_1 = 0; instr_0 = { OIN2, RE, `SH 10, SRC, RE, ZEROS, NCAR, NDIS }; instr_1 = instr_0; clockem; if (flags_0[OUT_MSB_F]) // extra bit exists, so RES_MAN 1 bit too large begin // RE << 1 -> RW (RES_MAN = RES_MAN w/o leading 1) instr_0 = { NMEM, RE, `SH 1, SRC, RW, ZEROS, NCAR, NDIS }; instr_1 = instr_0; clockem; // carry_word( IN2 (RES_EXP) + 1 ) -> RR instr_0 = { NMEM, CI2, NDIS }; instr_1 = instr_0; clockem; // IN2 + 1 + RR -> RE, OUT (RES_EXP = RES_EXP + 1) instr_0 = { NMEM, RR, NSH, SUM2S, RE, SUM2S, NCAR, NDIS }; instr_1 = instr_0; clockem; end else // no extra bit exists, so RES_MAN is normalized begin // RE << 2 -> RW (RES_MAN = RES_MAN w/o leading 1) instr_0 = { NMEM, RE, `SH 2, SRC, RW, ZEROS, NCAR, NDIS }; instr_1 = instr_0; clockem; // squander clock cycle(s) for equalization instr_0 = { NOOP }; instr_1 = instr_0; clockem; // IN2 -> RE, OUT (RES_EXP) instr_0 = { NMEM, RE, NSH, IN2, RE, IN2, NCAR, NDIS }; instr_1 = instr_0; clockem; end // sign(RES_EXP) | zero_res -> zero_res // OUT -> IN2 (RES_EXP) // RW >> 11 -> OUT (RES_MAN = RES_MAN >> 11 w/0) zero_res = zero_res | flags_0[OUT_MSB_F]; instr_0 = { OIN2, RW, `SH 11, SRC, RW, SRC, NCAR, NDIS }; instr_1 = instr_0; clockem; // OUT -> IN1 (RES_MAN) // IN2 inverse_masked_by R2 -> OUT (excess RES_EXP using ANDEXP) instr_0 = { OIN1, R2, NSH, SRC, R2, `I 01000100, NCAR, NDIS }; instr_1 = instr_0; clockem; if (zero_res) // RESULT = 0 begin // 0 -> RW ( |RESULT| = 0) instr_0 = { NMEM, RW, NSH, ZEROS, RW, ZEROS, NCAR, NDIS }; instr_1 = instr_0; clockem; end else // RESULT <=> 0 if (flags_0[OUT_ZER_F]) // 0 <= |RESULT| <= max begin // (RE << 53) | IN1 -> RW ( |RESULT| = (RES_EXP << 53) | RES_MAN ) instr_0 = { NMEM, RE, `SH 53, OR1S, RW, ZEROS, NCAR, NDIS }; instr_1 = instr_0; clockem; end else // |RESULT| > max begin // ones -> RW ( |RESULT| = limit ) instr_0 = { NMEM, RW, NSH, ONES, RW, ZEROS, NCAR, NDIS }; instr_1 = instr_0; clockem; end // sign_res -> w_in_0 // RW >> 1 -> OUT, R4 (RESULT >> 1 w/sign) w_in_0 = sign_res; w_in_1 = sign_res; instr_0 = { NMEM, RW, `SH 1, SRC, R4, SRC, NCAR, NDIS }; instr_1 = instr_0; clockem; // OUT -> RES (RESULT) instr_0 = { A_RES, OMEM, NALU, NCAR, NDIS }; instr_1 = instr_0; clockem; $display("4. Normalize and Encode: %d cycles", clock_ct - old_clock_ct); $display(" Total: %d cycles", clock_ct); $display; word = pe_0.mem[A_OP1]; $display(" OP1: sign %b, exponent %b,", word[`WORD_WIDTH-1], word[`WORD_WIDTH-2:52] ); $display(" mantissa %b", word[51:0] ); word = pe_0.mem[A_OP2]; $display("x OP2: sign %b, exponent %b,", word[`WORD_WIDTH-1], word[`WORD_WIDTH-2:52] ); $display(" mantissa %b", word[51:0] ); word = pe_0.mem[A_RES]; $display("= RES: sign %b, exponent %b,", word[`WORD_WIDTH-1], word[`WORD_WIDTH-2:52] ); $display(" mantissa %b", word[51:0] ); $display; end task reset_regs; begin #NS reset = 1; #NS reset = 0; end endtask task clockem; begin #NS clock = 1; #NS clock = 0; clock_ct = clock_ct + 1; end endtask task dump; begin #NS dump_r_0 = 1; #NS dump_r_0 = 0; #NS dump_m_0 = 1; #NS dump_m_0 = 0; form_feed; #NS dump_r_1 = 1; #NS dump_r_1 = 0; #NS dump_m_1 = 1; #NS dump_m_1 = 0; end endtask task form_feed; $write("\14"); endtask endmodule Host command: verilog Command arguments: pe.v pc18-3.v VERILOG-XL 1.6a.4 log file created Jul 14, 1994 19:16:41 * Copyright Cadence Design Systems, Inc. 1985, 1988. * * All Rights Reserved. Licensed Software. * * Confidential and proprietary information which is the * * property of Cadence Design Systems, Inc. * Compiling source file "pe.v" Compiling source file "pc18-3.v" Highest level modules: pe0 pe pe2 pe4 pc18 Multiplication, Floating Point, 64-Bit, 2 PEs PE "PE_1" Reset, Clock Cycle # 0 PE "PE_0" Reset, Clock Cycle # 0 1. Load and Add: 15 cycles 2. Multiply: 135 cycles 3. Reduce: 5 cycles 4. Normalize and Encode: 10 cycles Total: 165 cycles OP1: sign 0, exponent 10000000000, mantissa 1000000000000000000000000000000000000000000000000000 x OP2: sign 0, exponent 10000000001, mantissa 0100000000000000000000000000000000000000000000000000 = RES: sign 0, exponent 10000000010, mantissa 1110000000000000000000000000000000000000000000000000 40 warnings 70195 simulation events CPU time: 1.4 secs to compile + 0.6 secs to link + 12.6 secs in simulation End of VERILOG-XL 1.6a.4 Jul 14, 1994 19:16:56 module pc19; // pc19-3.v // a Verilog-XL behavioral model of a reconfigurable processor configuration // for a 32-bit floating-point multiply with 4 PEs // (using complementation and variable-bit-shift EAST & WEST registers) // (highest level module; requires module pe3 in file pe.v) // for Dr. W. B. Ligon, E&CE dept., Clemson U., 1992-4 // by Ken Winiecki // system parameters: `define WORD_WIDTH 32 // width of PE word, in bits `define ADDR_WIDTH 4 // width of PE memory address, in bits `define MEM_LENGTH 16 // length of PE memory, in words `define MHZ 16.7 // clock speed, in "megahertz" // "`define"s are used to circumnavigate a Verilog-XL bug that // prevents "parameter"s from working as bit length specifiers!!! // variable declarations: reg clock, reset, zero_res, sign_res, dump_r_0, dump_m_0, e_in_0, w_in_0, digit_0, dump_r_1, dump_m_1, e_in_1, w_in_1, digit_1, dump_r_2, dump_m_2, e_in_2, w_in_2, digit_2, dump_r_3, dump_m_3, e_in_3, w_in_3, digit_3; reg [`WORD_WIDTH-1:0] word; reg [`ADDR_WIDTH+44-1:0] instr_0, instr_1, instr_2, instr_3; wire w_out_0, w_out_1, w_out_2, w_out_3; wire [9:0] flags_0, flags_1, flags_2, flags_3; wire [`WORD_WIDTH-1:0] s0_n1, n1_s0, s1_n2, n2_s1, s2_n3, n3_s2; integer clock_ct, old_clock_ct; // PE instances and connections: pe3 pe_0 (clock, reset, instr_0, flags_0, 0, n1_s0, e_in_0, w_in_0, 0, , s0_n1, , w_out_0, , 0,,,,, dump_r_0, dump_m_0), pe_1 (clock, reset, instr_1, flags_1, s0_n1, n2_s1, e_in_1, w_in_1, 0, n1_s0, s1_n2, , w_out_1, , 0,,,,, dump_r_1, dump_m_1), pe_2 (clock, reset, instr_2, flags_2, s1_n2, n3_s2, e_in_2, w_in_2, 0, n2_s1, s2_n3, , w_out_2, , 0,,,,, dump_r_2, dump_m_2), pe_3 (clock, reset, instr_3, flags_3, s2_n3, 0, e_in_3, w_in_3, 0, n3_s2, , , w_out_3, , 0,,,,, dump_r_3, dump_m_3); defparam // set PE-instance parameters to system parameters: pe_0.ADDR_WIDTH = `ADDR_WIDTH, pe_0.WORD_WIDTH = `WORD_WIDTH, pe_0.MEM_LENGTH = `MEM_LENGTH, pe_0.PE_NAME = "PE_0", pe_1.ADDR_WIDTH = `ADDR_WIDTH, pe_1.WORD_WIDTH = `WORD_WIDTH, pe_1.MEM_LENGTH = `MEM_LENGTH, pe_1.PE_NAME = "PE_1", pe_2.ADDR_WIDTH = `ADDR_WIDTH, pe_2.WORD_WIDTH = `WORD_WIDTH, pe_2.MEM_LENGTH = `MEM_LENGTH, pe_2.PE_NAME = "PE_2", pe_3.ADDR_WIDTH = `ADDR_WIDTH, pe_3.WORD_WIDTH = `WORD_WIDTH, pe_3.MEM_LENGTH = `MEM_LENGTH, pe_3.PE_NAME = "PE_3"; // PE instruction fields and bit widths: // // mb_addr mb_srce mb_d_mem mb_d_in2 mb_d_in1 ... // (`ADDR_WIDTH) 1 1 1 1 // // src_reg shift reg_res dest_reg out_res ... // 4 6 8 4 8 // // car_in1 car_in2 car_nin1 car_nin2 car_srce car_val ... // 1 1 1 1 1 1 // // alu_dis alu_dis_s mb_dis mb_dis_s // 1 1 1 1 parameter // PE instruction field descriptions and values: // mb_addr: memory bus transfer address // mb_srce: source of memory bus transfer is ... OUT = 1'b0, // ... register "OUT" MEM = 1'b1, // ... memory // mb_d_mem & mb_d_in2 & mb_d_in1: destinations of memory bus transfers F = 1'b0, // false T = 1'b1, // true // srce_reg & dest_reg: destination and source registers of ALU operations R0 = 4'b0000, R1 = 4'b0001, R2 = 4'b0010, R3 = 4'b0011, R4 = 4'b0100, R5 = 4'b0101, R6 = 4'b0110, R7 = 4'b0111, R8 = 4'b1000, R9 = 4'b1001, RD = 4'b1010, RR = 4'b1011, RN = 4'b1100, RS = 4'b1101, RE = 4'b1110, RW = 4'b1111, // shift: shift east or west source register "shift" bits before using // reg_res & out_res: ALU operations for "OUT" and destination reg results IN1 = 8'b11110000, IN2 = 8'b11001100, SRC = 8'b10101010, NIN1 = 8'b00001111, NIN2 = 8'b00110011, NSRC = 8'b01010101, ZEROS = 8'b00000000, ONES = 8'b11111111, AND12 = 8'b11000000, OR12 = 8'b11111100, XOR12 = 8'b00111100, NAND12 = 8'b00111111, NOR12 = 8'b00000011, NXOR12 = 8'b11000011, AND1S = 8'b10100000, OR1S = 8'b11111010, XOR1S = 8'b01011010, NAND1S = 8'b01011111, NOR1S = 8'b00000101, NXOR1S = 8'b10100101, SUMN1S = NXOR1S, SUM1S = XOR1S, AND2S = 8'b10001000, OR2S = 8'b11101110, XOR2S = 8'b01100110, NAND2S = 8'b01110111, NOR2S = 8'b00010001, NXOR2S = 8'b10011001, SUMN2S = NXOR2S, SUM2S = XOR2S, AND12S = 8'b10000000, OR12S = 8'b11111110, XOR12S = 8'b01111110, NAND12S = 8'b01111111, NOR12S = 8'b00000001, NXOR12S = 8'b10000001, SUM12S = 8'b10010110, SUM1N2S = 8'b01101001, SUM2N1S = SUM1N2S; // Note: The PE only does EITHER an ALU OR a carry operation in one // instruction cycle. A carry word is computed using registers IN_1 and/or // IN/2 and a carry-in bit, and a carry operation is implied by specifying // the registers to use. The carry word is made available at the input // from the router, r_in, and the carry-out is placed in the carry flag of // the p_flags register. // car_in1: use "IN1" in carry word computation; T or F // car_in2: use "IN2" in carry word computation; T or F // car_nin1: use inverse of "IN1" (if use was specified); T or F // car_nin2: use inverse of "IN2" (if use was specified); T or F // car_srce: use value for carry-in (or else use carry flag); T or F // car_val: use 1 for value of carry-in (if use was specified); T or F // alu_dis & mb_dis: allow disabling of ALU/memory bus operation; T or F // alu_dis_i & mb_dis_i: invert PE disable bit for ALU/memory bus op; T or F parameter // bit positions of PE flags: // Note that flags other than carry are not affected by a carry operation, // and the carry flag is not affected by an ALU operation. REG_MSB_F = 9, // m.s.b. of destination register REG_ZER_F = 8, // if destination register = 0 OUT_MSB_F = 7, // m.s.b. of register OUT OUT_ZER_F = 6, // if register OUT = 0 IN2_MSB_F = 5, // m.s.b. of register IN2 IN2_ZER_F = 4, // if register IN2 = 0 IN1_MSB_F = 3, // m.s.b. of register IN1 IN1_ZER_F = 2, // if register IN1 = 0 DISABLE_F = 1, // state of PE disable bit CARRY_F = 0; // carry-out of last carry operation // convenience parameters and definitions for writing PE instructions: parameter NS = 500/`MHZ; // when specifying clock timing `define W `WORD_WIDTH'h // for specifying memory values (in hex) `define M `ADDR_WIDTH'h // for specifying memory address (in hex) parameter MIN1 = { MEM, F, F, T }; // when moving memory only to "IN1" parameter MIN2 = { MEM, F, T, F }; // when moving memory only to "IN2" parameter OMEM = { OUT, T, F, F }; // when moving "OUT" only to memory parameter OIN1 = { `M 0, OUT, F, F, T }; // when moving "OUT" only to "IN1" parameter OIN2 = { `M 0, OUT, F, T, F }; // when moving "OUT" only to "IN2" parameter NMEM = { `M 0, OUT, F, F, F }; // when not using memory bus `define SH 6'd // for specifying shift (in decimal) parameter NSH = `SH 0; // when not shifting (not using east/west srce regs) `define I 8'b // for specifying ALU operation parameter NALU = { R0, NSH, SRC, R0, ZEROS }; // when not doing ALU ops // (note that flags are affected and OUT is cleared) parameter NCAR = { F, F, F, F, F, F }; // when not doing carry operations parameter C1P2 = { NALU, T, T, F, F, T, F }; // for carry for "IN1" + "IN2" parameter C1M2 = { NALU, T, T, F, T, T, T }; // for carry for "IN1" - "IN2" parameter C2M1 = { NALU, T, T, T, F, T, T }; // for carry for "IN2" - "IN1" parameter CM1 = { NALU, T, F, T, F, T, T }; // for carry to negate "IN1" parameter CM2 = { NALU, F, T, F, T, T, T }; // for carry to negate "IN2" parameter CI1 = { NALU, T, F, F, F, T, T }; // for carry to increment "IN1" parameter CI2 = { NALU, F, T, F, F, T, T }; // for carry to increment "IN2" parameter C0 = { NALU, T, F, F, F, T, F }; // for carry = 0 parameter NDIS = { F, F, F, F }; // when not allowing any disabling parameter NOOP = { NMEM, NALU, NCAR, NDIS }; // when squandering clock cycles // (note that flags are affected and OUT is cleared) // Write a program in Verilog-XL to control the processing elements directly, // and put it in the "initial" block to execute. Set up the PE memory. // Reset the PE by clocking it's p_reset line. (Clock a line using #NS to // delay each transition.) Build a PE instruction using the above defined // instruction fields and value parameters. Clock the PEs. Monitor the // flags of a PE using the above bit position parameters. Note: Verilog-XL // has no way of dealing realistically with "don't cares," so never allow the // use of "don't knows" (undefined values)!!! // 32-bit floating-point representation: // bits 0-22: 23 lower-order bits of 24-bit normalized unsigned mantissa // bits 23-30: 8-bit biased exponent (bias = $7F) // bit 31: 1-bit sign of mantissa // special floating-point values: // all 1s : positive maximum (not infinity) // all 1s except sign : negative maximum (not infinity) // all zeros : zero // all zeros except sign : negative minimum (not zero) // floating-point multiplication program: // 1. Load and Add // 2. Multiply // 3. Reduce // 4. Normalize and Encode // register assignments: // R0 = (operand 1) operand 1 mantissa // R1 = (operand 2) // R2 = AND-mask for isolating/testing right-justified exponent // R3 = result exponent // R4 = result // memory address definitions: parameter A_OP1 = `M 0, // address of first operand A_OP2 = `M 1, // address of second operand A_RES = `M 2, // address of result A_ANDMAN = `M 3, // address of AND-mask for isolating mantissa A_ORMAN = `M 4, // address of OR-mask for replacing leading 1 of // mantissa A_ANDEXP = `M 5, // address of AND-mask for isolating/testing // right-justified exponent A_BIAS = `M 6; // address of exponent bias initial begin form_feed; $display("Multiplication, Floating Point, 32-Bit, 4 PEs"); clock = 0; clock_ct = 0; old_clock_ct = 0; dump_r_0 = 0; dump_m_0 = 0; dump_r_1 = 0; dump_m_1 = 0; dump_r_2 = 0; dump_m_2 = 0; dump_r_3 = 0; dump_m_3 = 0; pe_0.mem[A_ANDMAN] = `W 007fffff; // mantissa AND-mask pe_0.mem[A_ORMAN] = `W 00800000; // mantissa OR-mask pe_0.mem[A_ANDEXP] = `W 000000ff; // exponent AND-mask pe_0.mem[A_BIAS] = `W 0000007f; // exponent bias pe_1.mem[A_ANDMAN] = pe_0.mem[A_ANDMAN]; pe_1.mem[A_ORMAN] = pe_0.mem[A_ORMAN]; pe_1.mem[A_ANDEXP] = pe_0.mem[A_ANDEXP]; pe_1.mem[A_BIAS] = pe_0.mem[A_BIAS]; pe_2.mem[A_ANDMAN] = pe_0.mem[A_ANDMAN]; pe_2.mem[A_ORMAN] = pe_0.mem[A_ORMAN]; pe_2.mem[A_ANDEXP] = pe_0.mem[A_ANDEXP]; pe_2.mem[A_BIAS] = pe_0.mem[A_BIAS]; pe_3.mem[A_ANDMAN] = pe_0.mem[A_ANDMAN]; pe_3.mem[A_ORMAN] = pe_0.mem[A_ORMAN]; pe_3.mem[A_ANDEXP] = pe_0.mem[A_ANDEXP]; pe_3.mem[A_BIAS] = pe_0.mem[A_BIAS]; pe_0.mem[A_OP1] = { 1'b 0, 8'h 80, 23'b 10000000000000000000000 }; // 3 pe_0.mem[A_OP2] = { 1'b 0, 8'h 81, 23'b 01000000000000000000000 }; // 5 pe_1.mem[A_OP1] = pe_0.mem[A_OP1]; pe_1.mem[A_OP2] = pe_0.mem[A_OP2]; pe_2.mem[A_OP1] = pe_0.mem[A_OP1]; pe_2.mem[A_OP2] = pe_0.mem[A_OP2]; pe_3.mem[A_OP1] = pe_0.mem[A_OP1]; pe_3.mem[A_OP2] = pe_0.mem[A_OP2]; reset_regs; //// 1. Load and Add //// Description: //// Load operands; determine zeros, signs, exponents, and mantissas; //// add exponents and correct bias; set up for multiply. //// Note corrected biased exponent sum could be negative. //// Note if zero is determined, exponent and mantissa values are //// immaterial. //// Actions: //// zero(RES) = zero(OP1) | zero(OP2) //// sign(RES) = (msb(OP1) ^ msb(OP2)) & ~zero(RES) //// RES_EXP = (OP1 >> 23 & ANDEXP) + (OP2 >> 23 & ANDEXP) - BIAS //// OP1_MAN_0 = (OP1 & ANDMAN) | ORMAN //// OP1_MAN_1 = ( (OP1 & ANDMAN) | ORMAN ) >> 6 //// OP1_MAN_2 = ( (OP1 & ANDMAN) | ORMAN ) >> 12 //// OP1_MAN_3 = ( (OP1 & ANDMAN) | ORMAN ) >> 18 //// OP2_MAN = (OP2 & ANDMAN) | ORMAN //// RES_MAN_x = 0 //// Output State: //// 0: OP1_MAN_0 -> R0 //// 1: OP1_MAN_1 -> R0 //// 2: OP1_MAN_2 -> R0 //// 3: OP1_MAN_3 -> R0 //// x: OP2_MAN -> IN2 //// x: RES_MAN_x -> OUT //// x: ANDEXP -> R2 //// x: RES_EXP -> R3 //// -: zero(RES) -> zero_res //// -: sign(RES) -> sign_res //// // x: OP1 -> IN1 // -: zero(OP1) -> zero_res // -: sign(OP1) -> sign_res instr_0 = { A_OP1, MIN1, NALU, NCAR, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; zero_res = flags_0[IN1_ZER_F]; sign_res = flags_0[IN1_MSB_F]; // x: IN1 -> RW (OP1) // x: ANDEXP -> IN2 instr_0 = { A_ANDEXP, MIN2, R0, NSH, IN1, RW, ZEROS, NCAR, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; // x: IN1 -> R0 (OP1) // x: OP2 -> IN1 // x: zero(OP2) | zero_res -> zero_res // x: (sign(OP2) ^ sign_res) & ~zero_res -> sign_res instr_0 = { A_OP2, MIN1, R0, NSH, IN1, R0, ZEROS, NCAR, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; zero_res = zero_res | flags_0[IN1_ZER_F]; sign_res = (sign_res ^ flags_0[IN1_MSB_F]) & ~zero_res; // x: RW >> 23 & IN2 -> OUT (OP1_EXP = OP1 >> 23 & ANDEXP) // x: IN1 -> RW (OP2) w_in_0 = 0; w_in_1 = 0; w_in_2 = 0; w_in_3 = 0; instr_0 = { NMEM, RW, `SH 23, IN1, RW, AND2S, NCAR, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; // x: RW >> 23 & IN2 -> OUT (OP2_EXP = OP2 >> 23 & ANDEXP) // x: IN1 -> R1 (OP2) // x: OUT -> IN1 (OP1_EXP) instr_0 = { OIN1, RW, `SH 23, IN1, R1, AND2S, NCAR, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; // x: IN2 -> R2 (ANDEXP) // x: OUT -> IN2 (OP2_EXP) instr_0 = { OIN2, R0, NSH, IN2, R2, ZEROS, NCAR, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; // x: carry_word( IN1 (OP1_EXP) + IN2 (OP2_EXP) ) -> RR instr_0 = { NMEM, C1P2, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; // x: IN1 + IN2 + RR -> OUT (RES_EXP = OP1_EXP + OP2_EXP) // x: BIAS -> IN2 instr_0 = { A_BIAS, MIN2, RR, NSH, SRC, RR, SUM12S, NCAR, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; // x: OUT -> IN1 (RES_EXP) instr_0 = { OIN1, NALU, NCAR, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; // x: carry_word( IN1 (RES_EXP) - IN2 (BIAS) ) -> RR instr_0 = { NMEM, C1M2, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; // x: IN1 - IN2 + RR -> R3 (RES_EXP = RES_EXP - BIAS) // x: ANDMAN -> IN1 instr_0 = { A_ANDMAN, MIN1, RR, NSH, SUM1N2S, R3, ZEROS, NCAR, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; // x: ORMAN -> IN2 instr_0 = { A_ORMAN, MIN2, NALU, NCAR, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; // x: (R1 & IN1) | IN2 -> OUT (OP2_MAN = (OP2 & ANDMAN) | ORMAN) instr_0 = { NMEM, R1, NSH, SRC, R1, `I 11101100, NCAR, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; // x: OUT -> IN2 (OP2_MAN) // x: (R0 & IN1) | IN2 -> RW (OP1_MAN_x = (OP1 & ANDMAN) | ORMAN) instr_0 = { OIN2, R0, NSH, `I 11101100, RW, ZEROS, NCAR, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; // x: 0 -> OUT (RES_MAN_x = 0) // 0: RW -> R0 (OP1_MAN_0 = OP1_MAN) // 1: RW >> 6 -> R0 (OP1_MAN_1 = OP1_MAN >> 6) // 2: RW >> 12 -> R0 (OP1_MAN_2 = OP1_MAN >> 12) // 3: RW >> 18 -> R0 (OP1_MAN_3 = OP1_MAN >> 18) instr_0 = { NMEM, RW, NSH, SRC, R0, ZEROS, NCAR, NDIS }; instr_1 = { NMEM, RW, `SH 6, SRC, R0, ZEROS, NCAR, NDIS }; instr_2 = { NMEM, RW, `SH 12, SRC, R0, ZEROS, NCAR, NDIS }; instr_3 = { NMEM, RW, `SH 18, SRC, R0, ZEROS, NCAR, NDIS }; clockem; $display("1. Load and Add: %d cycles", clock_ct - old_clock_ct); old_clock_ct = clock_ct; //// 2. Multiply //// Description: //// For a number of times equal to 1/4 the maximum number of bits in //// a mantissa including the leading 1, //// rotate one mantissa right; //// if the msb of the rotated mantissa is 1, //// add the other mantissa to the result mantissa; //// shift the result mantissa right 1 with 0. //// Note the lower bits of the result mantissa are not required //// because the leading 1s of the operands guarantee the bit length //// of the result to be either the maximum or one bit less. //// Note the carry-in to the shift of the result mantissa is always 0 //// because the operands summed are many bits shorter than a word. //// Actions: //// repeat 6: //// OP1_MAN_x = OP1_MAN_x >> 1 w/lsb(OP1_MAN_x) (rotated) //// if msb(OP1_MAN_x) == 1 then RES_MAN_x = RES_MAN_x + OP2_MAN //// else msb(OP1_MAN_x) == 0 so do nothing //// RES_MAN_x = RES_MAN_x >> 1 w/0 //// Output state: //// 0: RES_MAN_0 -> RW //// 1: RES_MAN_1 -> RW //// 2: RES_MAN_2 -> RW //// 3: RES_MAN_3 -> RW //// x: ANDEXP -> R2 //// x: RES_EXP -> R3 //// -: zero(RES) -> zero_res //// -: sign(RES) -> sign_res //// repeat (6) begin // x: OUT -> IN1 (RES_MAN_x) // x: R0 -> RW (OP1_MAN_x) instr_0 = { OIN1, R0, NSH, SRC, RW, ZEROS, NCAR, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; // x: RW >> 1 w/w_out_0 -> R0 (OP1_MAN_x rotated right) // x: msb(R0) -> digit_x w_in_0 = w_out_0; w_in_1 = w_out_1; w_in_2 = w_out_2; w_in_3 = w_out_3; instr_0 = { NMEM, RW, `SH 1, SRC, R0, ZEROS, NCAR, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; digit_0 = flags_0[REG_MSB_F]; digit_1 = flags_1[REG_MSB_F]; digit_2 = flags_2[REG_MSB_F]; digit_3 = flags_3[REG_MSB_F]; // x: carry_word(IN1 (RES_MAN_x) + IN2 (OP2_MAN)) -> RR instr_0 = { NMEM, C1P2, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; if (digit_0) // OP1_MAN_0 digit == 1, so add to PSUM // 0: IN1 + IN2 + RR -> RW (RES_MAN_0 = RES_MAN_0 + OP2_MAN) instr_0 = { NMEM, RR, NSH, SUM12S, RW, ZEROS, NCAR, NDIS }; else // OP1_MAN_0 digit == 0, so do not add to PSUM // 0: IN1 -> RW (RES_MAN_0) instr_0 = { NMEM, RW, NSH, IN1, RW, ZEROS, NCAR, NDIS }; if (digit_1) // OP1_MAN_1 digit == 1, so add to PSUM // 1: IN1 + IN2 + RR -> RW (RES_MAN_1 = RES_MAN_1 + OP2_MAN) instr_1 = { NMEM, RR, NSH, SUM12S, RW, ZEROS, NCAR, NDIS }; else // OP1_MAN_1 digit == 0, so do not add to PSUM // 1: IN1 -> RW (RES_MAN_1) instr_1 = { NMEM, RW, NSH, IN1, RW, ZEROS, NCAR, NDIS }; if (digit_2) // OP1_MAN_2 digit == 1, so add to PSUM // 2: IN1 + IN2 + RR -> RW (RES_MAN_2 = RES_MAN_2 + OP2_MAN) instr_2 = { NMEM, RR, NSH, SUM12S, RW, ZEROS, NCAR, NDIS }; else // OP1_MAN_2 digit == 0, so do not add to PSUM // 2: IN1 -> RW (RES_MAN_2) instr_2 = { NMEM, RW, NSH, IN1, RW, ZEROS, NCAR, NDIS }; if (digit_3) // OP1_MAN_3 digit == 1, so add to PSUM // 3: IN1 + IN2 + RR -> RW (RES_MAN_3 = RES_MAN_3 + OP2_MAN) instr_3 = { NMEM, RR, NSH, SUM12S, RW, ZEROS, NCAR, NDIS }; else // OP1_MAN_3 digit == 0, so do not add to PSUM // 3: IN1 -> RW (RES_MAN_3) instr_3 = { NMEM, RW, NSH, IN1, RW, ZEROS, NCAR, NDIS }; clockem; // x: RW >> 1 w/0 -> OUT (next RES_MAN_x = RES_MAN_x >> 1 w/0) w_in_0 = 0; w_in_1 = 0; w_in_2 = 0; w_in_3 = 0; instr_0 = { NMEM, RW, `SH 1, ZEROS, R1, SRC, NCAR, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; end $display("2. Multiply: %d cycles", clock_ct - old_clock_ct); old_clock_ct = clock_ct; //// 3. Reduce //// Description: //// Reduce the result mantissa from the parallel components. //// Actions: //// RES_MAN = (RES_MAN_0 >> 18) + (RES_MAN_1 >> 12) //// + (RES_MAN_2 >> 6) + RES_MAN_3 //// Output state: //// 1: RES_MAN -> RE //// 2: RES_MAN -> RE //// x: ANDEXP -> R2 //// x: RES_EXP -> R3 //// -: zero(RES) -> zero_res //// -: sign(RES) -> sign_res //// // 0: RW >> 18 -> RS (RES_MAN_0 = RES_MAN_0 >> 18) // 1: RW >> 12 -> OUT (RES_MAN_1 = RES_MAN_1 >> 12) // 2: RW >> 6 -> OUT (RES_MAN_2 = RES_MAN_2 >> 6) // 3: RW -> RN (RES_MAN_3) instr_0 = { NMEM, RW, `SH 18, SRC, RS, SRC, NCAR, NDIS }; instr_1 = { NMEM, RW, `SH 12, SRC, R1, SRC, NCAR, NDIS }; instr_2 = { NMEM, RW, `SH 6, SRC, R1, SRC, NCAR, NDIS }; instr_3 = { NMEM, RW, NSH, SRC, RN, SRC, NCAR, NDIS }; clockem; // Note: only use PE_1 and PE_2 from here on. // x: OUT -> IN1 (RES_MAN_x) // 0: NOOP // 1: RN -> OUT (RES_MAN_0) // 2: RS -> OUT (RES_MAN_3) // 3: NOOP instr_0 = { NOOP }; instr_1 = { OIN1, RN, NSH, ZEROS, R1, SRC, NCAR, NDIS }; instr_2 = { OIN1, RS, NSH, ZEROS, R1, SRC, NCAR, NDIS }; instr_3 = { NOOP }; clockem; // 1: OUT -> IN2 (RES_MAN_0) // 2: OUT -> IN2 (RES_MAN_3) instr_1 = { OIN2, NALU, NCAR, NDIS }; instr_2 = instr_1; clockem; // 1: carry_word( IN1 (RES_MAN_1) + IN2 (RES_MAN_0) ) -> RR // 2: carry_word( IN1 (RES_MAN_2) + IN2 (RES_MAN_3) ) -> RR instr_1 = { NMEM, C1P2, NDIS }; instr_2 = instr_1; clockem; // 1: IN1 + IN2 + RR -> RS, OUT (RES_MAN_1 = RES_MAN_1 + RES_MAN_0) // 2: IN1 + IN2 + RR -> RN, OUT (RES_MAN_2 = RES_MAN_2 + RES_MAN_3) instr_1 = { NMEM, RR, NSH, SUM12S, RS, SUM12S, NCAR, NDIS }; instr_2 = { NMEM, RR, NSH, SUM12S, RN, SUM12S, NCAR, NDIS }; clockem; // Note: from here on, PE_1 and PE_2 are functionally identical // x: OUT -> IN1 (RES_MAN_x) // 1: RS -> OUT (RES_MAN_2) // 2: RN -> OUT (RES_MAN_1) instr_1 = { OIN1, RS, NSH, ZEROS, R1, SRC, NCAR, NDIS }; instr_2 = { OIN1, RN, NSH, ZEROS, R1, SRC, NCAR, NDIS }; clockem; // 1: OUT -> IN2 (RES_MAN_2) // 2: OUT -> IN2 (RES_MAN_1) instr_1 = { OIN2, NALU, NCAR, NDIS }; instr_2 = instr_1; clockem; // 1: carry_word( IN1 (RES_MAN_1) + IN2 (RES_MAN_2) ) -> RR // 2: carry_word( IN1 (RES_MAN_2) + IN2 (RES_MAN_1) ) -> RR instr_1 = { NMEM, C1P2, NDIS }; instr_2 = instr_1; clockem; // 1: IN1 + IN2 + RR -> RE (RES_MAN = RES_MAN_1 + RES_MAN_2) // 2: IN1 + IN2 + RR -> RE (RES_MAN = RES_MAN_2 + RES_MAN_1) instr_1 = { NMEM, RR, NSH, SUM12S, RE, ZEROS, NCAR, NDIS }; instr_2 = instr_1; clockem; $display("3. Reduce: %d cycles", clock_ct - old_clock_ct); old_clock_ct = clock_ct; //// 4. Normalize and Encode //// Description: //// If the mantissa is 1 bit too long, //// increment the exponent, and //// truncate the mantissa by 1 bit; //// determine if the exponent is greater than the maximum; //// encode the exponent, mantissa, and sign, and //// handle zero, overflow, and underflow. //// Actions: //// if RES_MAN[25] then //// RES_EXP = RES_EXP + 1 //// RES_MAN = RES_MAN >> 1 w/0 //// excess(RES_EXP) = RES_EXP inverse_masked_by ANDEXP //// if OP1 == 0 || OP2 == 0 || RES_EXP < 0 then RESULT = 0 //// else RESULT <> 0 so //// if excess(RES_EXP) == 0 then 0 <= |RESULT| <= maximum so //// |RESULT| = (RES_EXP << 24) | RES_MAN //// else excess(RES_EXP) <> 0 so |RESULT| > maximum so //// |RESULT| = maximum //// RESULT = |RESULT| >> 1 w/sign //// store RESULT //// // R3 -> OUT (RES_EXP) instr_1 = { NMEM, R3, NSH, SRC, R3, SRC, NCAR, NDIS }; instr_2 = instr_1; clockem; // OUT -> IN2 (RES_EXP) // RE << 7 -> RE (RES_MAN << 7) e_in_1 = 0; e_in_2 = 0; instr_1 = { OIN2, RE, `SH 7, SRC, RE, ZEROS, NCAR, NDIS }; instr_2 = instr_1; clockem; if (flags_1[OUT_MSB_F]) // extra bit exists, so RES_MAN 1 bit too large begin // RE << 1 -> RW (RES_MAN = RES_MAN w/o leading 1) instr_1 = { NMEM, RE, `SH 1, SRC, RW, ZEROS, NCAR, NDIS }; instr_2 = instr_1; clockem; // carry_word( IN2 (RES_EXP) + 1 ) -> RR instr_1 = { NMEM, CI2, NDIS }; instr_2 = instr_1; clockem; // IN2 + 1 + RR -> RE, OUT (RES_EXP = RES_EXP + 1) instr_1 = { NMEM, RR, NSH, SUM2S, RE, SUM2S, NCAR, NDIS }; instr_2 = instr_1; clockem; end else // no extra bit exists, so RES_MAN is normalized begin // RE << 2 -> RW (RES_MAN = RES_MAN w/o leading 1) instr_1 = { NMEM, RE, `SH 2, SRC, RW, ZEROS, NCAR, NDIS }; instr_2 = instr_1; clockem; // squander clock cycle(s) for equalization instr_1 = { NOOP }; instr_2 = instr_1; clockem; // IN2 -> RE, OUT (RES_EXP) instr_1 = { NMEM, RE, NSH, IN2, RE, IN2, NCAR, NDIS }; instr_2 = instr_1; clockem; end // sign(RES_EXP) | zero_res -> zero_res // OUT -> IN2 (RES_EXP) // RW >> 8 -> OUT (RES_MAN = RES_MAN >> 8 w/0) zero_res = zero_res | flags_1[OUT_MSB_F]; instr_1 = { OIN2, RW, `SH 8, SRC, RW, SRC, NCAR, NDIS }; instr_2 = instr_1; clockem; // OUT -> IN1 (RES_MAN) // IN2 inverse_masked_by R2 -> OUT (excess RES_EXP using ANDEXP) instr_1 = { OIN1, R2, NSH, SRC, R2, `I 01000100, NCAR, NDIS }; instr_2 = instr_1; clockem; if (zero_res) // RESULT = 0 begin // 0 -> RW ( |RESULT| = 0) instr_1 = { NMEM, RW, NSH, ZEROS, RW, ZEROS, NCAR, NDIS }; instr_2 = instr_1; clockem; end else // RESULT <=> 0 if (flags_1[OUT_ZER_F]) // 0 <= |RESULT| <= max begin // (RE << 24) | IN1 -> RW ( |RESULT| = (RES_EXP << 24) | RES_MAN ) instr_1 = { NMEM, RE, `SH 24, OR1S, RW, ZEROS, NCAR, NDIS }; instr_2 = instr_1; clockem; end else // |RESULT| > max begin // ones -> RW ( |RESULT| = limit ) instr_1 = { NMEM, RW, NSH, ONES, RW, ZEROS, NCAR, NDIS }; instr_2 = instr_1; clockem; end // sign_res -> w_in_1 // RW >> 1 -> OUT, R4 (RESULT >> 1 w/sign) w_in_1 = sign_res; w_in_2 = sign_res; instr_1 = { NMEM, RW, `SH 1, SRC, R4, SRC, NCAR, NDIS }; instr_2 = instr_1; clockem; // OUT -> RES (RESULT) instr_1 = { A_RES, OMEM, NALU, NCAR, NDIS }; instr_2 = instr_1; clockem; $display("4. Normalize and Encode: %d cycles", clock_ct - old_clock_ct); $display(" Total: %d cycles", clock_ct); $display; word = pe_1.mem[A_OP1]; $display(" OP1: sign %b, exponent %b ($%x),", word[`WORD_WIDTH-1], word[`WORD_WIDTH-2:23], word[`WORD_WIDTH-2:23] ); $display(" mantissa %b", word[22:0] ); word = pe_1.mem[A_OP2]; $display("x OP2: sign %b, exponent %b ($%x),", word[`WORD_WIDTH-1], word[`WORD_WIDTH-2:23], word[`WORD_WIDTH-2:23] ); $display(" mantissa %b", word[22:0] ); word = pe_1.mem[A_RES]; $display("= RES: sign %b, exponent %b ($%x),", word[`WORD_WIDTH-1], word[`WORD_WIDTH-2:23], word[`WORD_WIDTH-2:23] ); $display(" mantissa %b", word[22:0] ); $display; end task reset_regs; begin #NS reset = 1; #NS reset = 0; end endtask task clockem; begin #NS clock = 1; #NS clock = 0; clock_ct = clock_ct + 1; end endtask task dump; begin #NS dump_r_0 = 1; #NS dump_r_0 = 0; #NS dump_m_0 = 1; #NS dump_m_0 = 0; form_feed; #NS dump_r_1 = 1; #NS dump_r_1 = 0; #NS dump_m_1 = 1; #NS dump_m_1 = 0; form_feed; #NS dump_r_2 = 1; #NS dump_r_2 = 0; #NS dump_m_2 = 1; #NS dump_m_2 = 0; form_feed; #NS dump_r_3 = 1; #NS dump_r_3 = 0; #NS dump_m_3 = 1; #NS dump_m_3 = 0; end endtask task form_feed; $write("\14"); endtask endmodule Host command: verilog Command arguments: pe.v pc19-3.v VERILOG-XL 1.6a.4 log file created Jul 14, 1994 19:17:02 * Copyright Cadence Design Systems, Inc. 1985, 1988. * * All Rights Reserved. Licensed Software. * * Confidential and proprietary information which is the * * property of Cadence Design Systems, Inc. * Compiling source file "pe.v" Compiling source file "pc19-3.v" Highest level modules: pe0 pe pe2 pe4 pc19 Multiplication, Floating Point, 32-Bit, 4 PEs PE "PE_3" Reset, Clock Cycle # 0 PE "PE_2" Reset, Clock Cycle # 0 PE "PE_1" Reset, Clock Cycle # 0 PE "PE_0" Reset, Clock Cycle # 0 1. Load and Add: 15 cycles 2. Multiply: 30 cycles 3. Reduce: 9 cycles 4. Normalize and Encode: 10 cycles Total: 64 cycles OP1: sign 0, exponent 10000000 ($80), mantissa 10000000000000000000000 x OP2: sign 0, exponent 10000001 ($81), mantissa 01000000000000000000000 = RES: sign 0, exponent 10000010 ($82), mantissa 11100000000000000000000 38 warnings 32905 simulation events CPU time: 1.5 secs to compile + 0.8 secs to link + 5.5 secs in simulation End of VERILOG-XL 1.6a.4 Jul 14, 1994 19:17:10 module pc20; // pc20-3.v // a Verilog-XL behavioral model of a reconfigurable processor configuration // for a 64-bit floating-point multiply with 4 PEs // (using complementation and variable-bit-shift EAST & WEST registers) // (highest level module; requires module pe3 in file pe.v) // for Dr. W. B. Ligon, E&CE dept., Clemson U., 1992-4 // by Ken Winiecki // system parameters: `define WORD_WIDTH 64 // width of PE word, in bits `define ADDR_WIDTH 4 // width of PE memory address, in bits `define MEM_LENGTH 16 // length of PE memory, in words `define MHZ 16.7 // clock speed, in "megahertz" // "`define"s are used to circumnavigate a Verilog-XL bug that // prevents "parameter"s from working as bit length specifiers!!! // variable declarations: reg clock, reset, zero_res, sign_res, dump_r_0, dump_m_0, e_in_0, w_in_0, digit_0, dump_r_1, dump_m_1, e_in_1, w_in_1, digit_1, dump_r_2, dump_m_2, e_in_2, w_in_2, digit_2, dump_r_3, dump_m_3, e_in_3, w_in_3, digit_3; reg [`WORD_WIDTH-1:0] word; reg [`ADDR_WIDTH+44-1:0] instr_0, instr_1, instr_2, instr_3; wire w_out_0, w_out_1, w_out_2, w_out_3; wire [9:0] flags_0, flags_1, flags_2, flags_3; wire [`WORD_WIDTH-1:0] s0_n1, n1_s0, s1_n2, n2_s1, s2_n3, n3_s2; integer clock_ct, old_clock_ct; // PE instances and connections: pe3 pe_0 (clock, reset, instr_0, flags_0, 0, n1_s0, e_in_0, w_in_0, 0, , s0_n1, , w_out_0, , 0,,,,, dump_r_0, dump_m_0), pe_1 (clock, reset, instr_1, flags_1, s0_n1, n2_s1, e_in_1, w_in_1, 0, n1_s0, s1_n2, , w_out_1, , 0,,,,, dump_r_1, dump_m_1), pe_2 (clock, reset, instr_2, flags_2, s1_n2, n3_s2, e_in_2, w_in_2, 0, n2_s1, s2_n3, , w_out_2, , 0,,,,, dump_r_2, dump_m_2), pe_3 (clock, reset, instr_3, flags_3, s2_n3, 0, e_in_3, w_in_3, 0, n3_s2, , , w_out_3, , 0,,,,, dump_r_3, dump_m_3); defparam // set PE-instance parameters to system parameters: pe_0.ADDR_WIDTH = `ADDR_WIDTH, pe_0.WORD_WIDTH = `WORD_WIDTH, pe_0.MEM_LENGTH = `MEM_LENGTH, pe_0.PE_NAME = "PE_0", pe_1.ADDR_WIDTH = `ADDR_WIDTH, pe_1.WORD_WIDTH = `WORD_WIDTH, pe_1.MEM_LENGTH = `MEM_LENGTH, pe_1.PE_NAME = "PE_1", pe_2.ADDR_WIDTH = `ADDR_WIDTH, pe_2.WORD_WIDTH = `WORD_WIDTH, pe_2.MEM_LENGTH = `MEM_LENGTH, pe_2.PE_NAME = "PE_2", pe_3.ADDR_WIDTH = `ADDR_WIDTH, pe_3.WORD_WIDTH = `WORD_WIDTH, pe_3.MEM_LENGTH = `MEM_LENGTH, pe_3.PE_NAME = "PE_3"; // PE instruction fields and bit widths: // // mb_addr mb_srce mb_d_mem mb_d_in2 mb_d_in1 ... // (`ADDR_WIDTH) 1 1 1 1 // // src_reg shift reg_res dest_reg out_res ... // 4 6 8 4 8 // // car_in1 car_in2 car_nin1 car_nin2 car_srce car_val ... // 1 1 1 1 1 1 // // alu_dis alu_dis_s mb_dis mb_dis_s // 1 1 1 1 parameter // PE instruction field descriptions and values: // mb_addr: memory bus transfer address // mb_srce: source of memory bus transfer is ... OUT = 1'b0, // ... register "OUT" MEM = 1'b1, // ... memory // mb_d_mem & mb_d_in2 & mb_d_in1: destinations of memory bus transfers F = 1'b0, // false T = 1'b1, // true // srce_reg & dest_reg: destination and source registers of ALU operations R0 = 4'b0000, R1 = 4'b0001, R2 = 4'b0010, R3 = 4'b0011, R4 = 4'b0100, R5 = 4'b0101, R6 = 4'b0110, R7 = 4'b0111, R8 = 4'b1000, R9 = 4'b1001, RD = 4'b1010, RR = 4'b1011, RN = 4'b1100, RS = 4'b1101, RE = 4'b1110, RW = 4'b1111, // shift: shift east or west source register "shift" bits before using // reg_res & out_res: ALU operations for "OUT" and destination reg results IN1 = 8'b11110000, IN2 = 8'b11001100, SRC = 8'b10101010, NIN1 = 8'b00001111, NIN2 = 8'b00110011, NSRC = 8'b01010101, ZEROS = 8'b00000000, ONES = 8'b11111111, AND12 = 8'b11000000, OR12 = 8'b11111100, XOR12 = 8'b00111100, NAND12 = 8'b00111111, NOR12 = 8'b00000011, NXOR12 = 8'b11000011, AND1S = 8'b10100000, OR1S = 8'b11111010, XOR1S = 8'b01011010, NAND1S = 8'b01011111, NOR1S = 8'b00000101, NXOR1S = 8'b10100101, SUMN1S = NXOR1S, SUM1S = XOR1S, AND2S = 8'b10001000, OR2S = 8'b11101110, XOR2S = 8'b01100110, NAND2S = 8'b01110111, NOR2S = 8'b00010001, NXOR2S = 8'b10011001, SUMN2S = NXOR2S, SUM2S = XOR2S, AND12S = 8'b10000000, OR12S = 8'b11111110, XOR12S = 8'b01111110, NAND12S = 8'b01111111, NOR12S = 8'b00000001, NXOR12S = 8'b10000001, SUM12S = 8'b10010110, SUM1N2S = 8'b01101001, SUM2N1S = SUM1N2S; // Note: The PE only does EITHER an ALU OR a carry operation in one // instruction cycle. A carry word is computed using registers IN_1 and/or // IN/2 and a carry-in bit, and a carry operation is implied by specifying // the registers to use. The carry word is made available at the input // from the router, r_in, and the carry-out is placed in the carry flag of // the p_flags register. // car_in1: use "IN1" in carry word computation; T or F // car_in2: use "IN2" in carry word computation; T or F // car_nin1: use inverse of "IN1" (if use was specified); T or F // car_nin2: use inverse of "IN2" (if use was specified); T or F // car_srce: use value for carry-in (or else use carry flag); T or F // car_val: use 1 for value of carry-in (if use was specified); T or F // alu_dis & mb_dis: allow disabling of ALU/memory bus operation; T or F // alu_dis_i & mb_dis_i: invert PE disable bit for ALU/memory bus op; T or F parameter // bit positions of PE flags: // Note that flags other than carry are not affected by a carry operation, // and the carry flag is not affected by an ALU operation. REG_MSB_F = 9, // m.s.b. of destination register REG_ZER_F = 8, // if destination register = 0 OUT_MSB_F = 7, // m.s.b. of register OUT OUT_ZER_F = 6, // if register OUT = 0 IN2_MSB_F = 5, // m.s.b. of register IN2 IN2_ZER_F = 4, // if register IN2 = 0 IN1_MSB_F = 3, // m.s.b. of register IN1 IN1_ZER_F = 2, // if register IN1 = 0 DISABLE_F = 1, // state of PE disable bit CARRY_F = 0; // carry-out of last carry operation // convenience parameters and definitions for writing PE instructions: parameter NS = 500/`MHZ; // when specifying clock timing `define W `WORD_WIDTH'h // for specifying memory values (in hex) `define M `ADDR_WIDTH'h // for specifying memory address (in hex) parameter MIN1 = { MEM, F, F, T }; // when moving memory only to "IN1" parameter MIN2 = { MEM, F, T, F }; // when moving memory only to "IN2" parameter OMEM = { OUT, T, F, F }; // when moving "OUT" only to memory parameter OIN1 = { `M 0, OUT, F, F, T }; // when moving "OUT" only to "IN1" parameter OIN2 = { `M 0, OUT, F, T, F }; // when moving "OUT" only to "IN2" parameter NMEM = { `M 0, OUT, F, F, F }; // when not using memory bus `define SH 6'd // for specifying shift (in decimal) parameter NSH = `SH 0; // when not shifting (not using east/west srce regs) `define I 8'b // for specifying ALU operation parameter NALU = { R0, NSH, SRC, R0, ZEROS }; // when not doing ALU ops // (note that flags are affected and OUT is cleared) parameter NCAR = { F, F, F, F, F, F }; // when not doing carry operations parameter C1P2 = { NALU, T, T, F, F, T, F }; // for carry for "IN1" + "IN2" parameter C1M2 = { NALU, T, T, F, T, T, T }; // for carry for "IN1" - "IN2" parameter C2M1 = { NALU, T, T, T, F, T, T }; // for carry for "IN2" - "IN1" parameter CM1 = { NALU, T, F, T, F, T, T }; // for carry to negate "IN1" parameter CM2 = { NALU, F, T, F, T, T, T }; // for carry to negate "IN2" parameter CI1 = { NALU, T, F, F, F, T, T }; // for carry to increment "IN1" parameter CI2 = { NALU, F, T, F, F, T, T }; // for carry to increment "IN2" parameter C0 = { NALU, T, F, F, F, T, F }; // for carry = 0 parameter NDIS = { F, F, F, F }; // when not allowing any disabling parameter NOOP = { NMEM, NALU, NCAR, NDIS }; // when squandering clock cycles // (note that flags are affected and OUT is cleared) // Write a program in Verilog-XL to control the processing elements directly, // and put it in the "initial" block to execute. Set up the PE memory. // Reset the PE by clocking it's p_reset line. (Clock a line using #NS to // delay each transition.) Build a PE instruction using the above defined // instruction fields and value parameters. Clock the PEs. Monitor the // flags of a PE using the above bit position parameters. Note: Verilog-XL // has no way of dealing realistically with "don't cares," so never allow the // use of "don't knows" (undefined values)!!! // 64-bit floating-point representation: // bits 0-51: 52 lower-order bits of 53-bit normalized unsigned mantissa // bits 52-62: 11-bit biased exponent (bias = $3ff) // bit 63: 1-bit sign of mantissa // special floating-point values: // all 1s : positive maximum (not infinity) // all 1s except sign : negative maximum (not infinity) // all zeros : zero // all zeros except sign : negative minimum (not zero) // floating-point multiplication program: // 1. Load and Add // 2. Multiply // 3. Reduce // 4. Normalize and Encode // register assignments: // R0 = (operand 1) operand 1 mantissa // R1 = (operand 2) // R2 = AND-mask for isolating/testing right-justified exponent // R3 = result exponent // R4 = result // memory address definitions: parameter A_OP1 = `M 0, // address of first operand A_OP2 = `M 1, // address of second operand A_RES = `M 2, // address of result A_ANDMAN = `M 3, // address of AND-mask for isolating mantissa A_ORMAN = `M 4, // address of OR-mask for replacing leading 1 of // mantissa A_ANDEXP = `M 5, // address of AND-mask for isolating/testing // right-justified exponent A_BIAS = `M 6; // address of exponent bias initial begin form_feed; $display("Multiplication, Floating Point, 64-Bit, 4 PEs"); clock = 0; clock_ct = 0; old_clock_ct = 0; dump_r_0 = 0; dump_m_0 = 0; dump_r_1 = 0; dump_m_1 = 0; dump_r_2 = 0; dump_m_2 = 0; dump_r_3 = 0; dump_m_3 = 0; // ++++----++++---- pe_0.mem[A_ANDMAN] = `W 000fffffffffffff; // mantissa AND-mask pe_0.mem[A_ORMAN] = `W 0010000000000000; // mantissa OR-mask pe_0.mem[A_ANDEXP] = `W 00000000000007ff; // exponent AND-mask pe_0.mem[A_BIAS] = `W 00000000000003ff; // exponent bias pe_1.mem[A_ANDMAN] = pe_0.mem[A_ANDMAN]; pe_1.mem[A_ORMAN] = pe_0.mem[A_ORMAN]; pe_1.mem[A_ANDEXP] = pe_0.mem[A_ANDEXP]; pe_1.mem[A_BIAS] = pe_0.mem[A_BIAS]; pe_2.mem[A_ANDMAN] = pe_0.mem[A_ANDMAN]; pe_2.mem[A_ORMAN] = pe_0.mem[A_ORMAN]; pe_2.mem[A_ANDEXP] = pe_0.mem[A_ANDEXP]; pe_2.mem[A_BIAS] = pe_0.mem[A_BIAS]; pe_3.mem[A_ANDMAN] = pe_0.mem[A_ANDMAN]; pe_3.mem[A_ORMAN] = pe_0.mem[A_ORMAN]; pe_3.mem[A_ANDEXP] = pe_0.mem[A_ANDEXP]; pe_3.mem[A_BIAS] = pe_0.mem[A_BIAS]; // +++----++++ pe_0.mem[A_OP1] = { 1'b 0, 11'b 10000000000, 52'b 1000000000000000000000000000000000000000000000000000 }; // 3 // ++++----++++----++++----++++----++++----++++----++++ pe_0.mem[A_OP2] = { 1'b 0, 11'b 10000000001, 52'b 0100000000000000000000000000000000000000000000000000 }; // 5 pe_1.mem[A_OP1] = pe_0.mem[A_OP1]; pe_1.mem[A_OP2] = pe_0.mem[A_OP2]; pe_2.mem[A_OP1] = pe_0.mem[A_OP1]; pe_2.mem[A_OP2] = pe_0.mem[A_OP2]; pe_3.mem[A_OP1] = pe_0.mem[A_OP1]; pe_3.mem[A_OP2] = pe_0.mem[A_OP2]; reset_regs; //// 1. Load and Add //// Description: //// Load operands; determine zeros, signs, exponents, and mantissas; //// add exponents and correct bias; set up for multiply. //// Note corrected biased exponent sum could be negative. //// Note if zero is determined, exponent and mantissa values are //// immaterial. //// Actions: //// zero(RES) = zero(OP1) | zero(OP2) //// sign(RES) = (msb(OP1) ^ msb(OP2)) & ~zero(RES) //// RES_EXP = (OP1 >> 52 & ANDEXP) + (OP2 >> 52 & ANDEXP) - BIAS //// OP1_MAN_0 = (OP1 & ANDMAN) | ORMAN //// OP1_MAN_1 = ( (OP1 & ANDMAN) | ORMAN ) >> 13 //// OP1_MAN_2 = ( (OP1 & ANDMAN) | ORMAN ) >> 26 //// OP1_MAN_3 = ( (OP1 & ANDMAN) | ORMAN ) >> 39 //// OP2_MAN = (OP2 & ANDMAN) | ORMAN //// RES_MAN_x = 0 //// Output State: //// 0: OP1_MAN_0 -> R0 //// 1: OP1_MAN_1 -> R0 //// 2: OP1_MAN_2 -> R0 //// 3: OP1_MAN_3 -> R0 //// x: OP2_MAN -> IN2 //// x: RES_MAN_x -> OUT //// x: ANDEXP -> R2 //// x: RES_EXP -> R3 //// -: zero(RES) -> zero_res //// -: sign(RES) -> sign_res //// // x: OP1 -> IN1 // -: zero(OP1) -> zero_res // -: sign(OP1) -> sign_res instr_0 = { A_OP1, MIN1, NALU, NCAR, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; zero_res = flags_0[IN1_ZER_F]; sign_res = flags_0[IN1_MSB_F]; // x: IN1 -> RW (OP1) // x: ANDEXP -> IN2 instr_0 = { A_ANDEXP, MIN2, R0, NSH, IN1, RW, ZEROS, NCAR, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; // x: IN1 -> R0 (OP1) // x: OP2 -> IN1 // x: zero(OP2) | zero_res -> zero_res // x: (sign(OP2) ^ sign_res) & ~zero_res -> sign_res instr_0 = { A_OP2, MIN1, R0, NSH, IN1, R0, ZEROS, NCAR, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; zero_res = zero_res | flags_0[IN1_ZER_F]; sign_res = (sign_res ^ flags_0[IN1_MSB_F]) & ~zero_res; // x: RW >> 52 & IN2 -> OUT (OP1_EXP = OP1 >> 52 & ANDEXP) // x: IN1 -> RW (OP2) w_in_0 = 0; w_in_1 = 0; w_in_2 = 0; w_in_3 = 0; instr_0 = { NMEM, RW, `SH 52, IN1, RW, AND2S, NCAR, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; // x: RW >> 52 & IN2 -> OUT (OP2_EXP = OP2 >> 52 & ANDEXP) // x: IN1 -> R1 (OP2) // x: OUT -> IN1 (OP1_EXP) instr_0 = { OIN1, RW, `SH 52, IN1, R1, AND2S, NCAR, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; // x: IN2 -> R2 (ANDEXP) // x: OUT -> IN2 (OP2_EXP) instr_0 = { OIN2, R0, NSH, IN2, R2, ZEROS, NCAR, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; // x: carry_word( IN1 (OP1_EXP) + IN2 (OP2_EXP) ) -> RR instr_0 = { NMEM, C1P2, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; // x: IN1 + IN2 + RR -> OUT (RES_EXP = OP1_EXP + OP2_EXP) // x: BIAS -> IN2 instr_0 = { A_BIAS, MIN2, RR, NSH, SRC, RR, SUM12S, NCAR, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; // x: OUT -> IN1 (RES_EXP) instr_0 = { OIN1, NALU, NCAR, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; // x: carry_word( IN1 (RES_EXP) - IN2 (BIAS) ) -> RR instr_0 = { NMEM, C1M2, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; // x: IN1 - IN2 + RR -> R3 (RES_EXP = RES_EXP - BIAS) // x: ANDMAN -> IN1 instr_0 = { A_ANDMAN, MIN1, RR, NSH, SUM1N2S, R3, ZEROS, NCAR, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; // x: ORMAN -> IN2 instr_0 = { A_ORMAN, MIN2, NALU, NCAR, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; // x: (R1 & IN1) | IN2 -> OUT (OP2_MAN = (OP2 & ANDMAN) | ORMAN) instr_0 = { NMEM, R1, NSH, SRC, R1, `I 11101100, NCAR, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; // x: OUT -> IN2 (OP2_MAN) // x: (R0 & IN1) | IN2 -> RW (OP1_MAN = (OP1 & ANDMAN) | ORMAN) instr_0 = { OIN2, R0, NSH, `I 11101100, RW, ZEROS, NCAR, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; // x: 0 -> OUT (RES_MAN_x = 0) // 0: RW -> R0 (OP1_MAN_0 = OP1_MAN) // 1: RW >> 13 -> R0 (OP1_MAN_1 = OP1_MAN >> 13) // 2: RW >> 26 -> R0 (OP1_MAN_2 = OP1_MAN >> 26) // 3: RW >> 39 -> R0 (OP1_MAN_3 = OP1_MAN >> 39) instr_0 = { NMEM, RW, NSH, SRC, R0, ZEROS, NCAR, NDIS }; instr_1 = { NMEM, RW, `SH 13, SRC, R0, ZEROS, NCAR, NDIS }; instr_2 = { NMEM, RW, `SH 26, SRC, R0, ZEROS, NCAR, NDIS }; instr_3 = { NMEM, RW, `SH 39, SRC, R0, ZEROS, NCAR, NDIS }; clockem; $display("1. Load and Add: %d cycles", clock_ct - old_clock_ct); old_clock_ct = clock_ct; //// 2. Multiply //// Description: //// For a number of times equal to 1/4 the maximum number of bits in //// a mantissa including the leading 1, //// rotate one mantissa right; //// if the msb of the rotated mantissa is 1, //// add the other mantissa to the result mantissa; //// shift the result mantissa right 1 with 0. //// Since there are an odd number of bits, perform the above sequence //// once more, but only on the processor with the larger part. //// Note the lower bits of the result mantissa are not required //// because the leading 1s of the operands guarantee the bit length //// of the result to be either the maximum or one bit less. //// Note the carry-in to the shift of the result mantissa is always 0 //// because the operands summed are many bits shorter than a word. //// Actions: //// repeat 13: //// OP1_MAN_x = OP1_MAN_x >> 1 w/lsb(OP1_MAN_x) (rotated) //// if msb(OP1_MAN_x) == 1 then RES_MAN_x = RES_MAN_x + OP2_MAN //// else msb(OP1_MAN_x) == 0 so do nothing //// RES_MAN_x = RES_MAN_x >> 1 w/0 //// OP1_MAN_3 = OP1_MAN_3 >> 1 w/lsb(OP1_MAN_3) (rotated) //// if msb(OP1_MAN_3) == 1 then RES_MAN_3 = RES_MAN_3 + OP2_MAN //// else msb(OP1_MAN_3) == 0 so do nothing //// RES_MAN_3 = RES_MAN_3 >> 1 w/0 //// Output state: //// 0: RES_MAN_0 -> RW //// 1: RES_MAN_1 -> RW //// 2: RES_MAN_2 -> RW //// 3: RES_MAN_3 -> RW //// x: ANDEXP -> R2 //// x: RES_EXP -> R3 //// -: zero(RES) -> zero_res //// -: sign(RES) -> sign_res //// repeat (13) begin // x: OUT -> IN1 (RES_MAN_x) // x: R0 -> RW (OP1_MAN_x) instr_0 = { OIN1, R0, NSH, SRC, RW, ZEROS, NCAR, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; // x: RW >> 1 w/w_out_0 -> R0 (OP1_MAN_x rotated right) // x: msb(R0) -> digit_x w_in_0 = w_out_0; w_in_1 = w_out_1; w_in_2 = w_out_2; w_in_3 = w_out_3; instr_0 = { NMEM, RW, `SH 1, SRC, R0, ZEROS, NCAR, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; digit_0 = flags_0[REG_MSB_F]; digit_1 = flags_1[REG_MSB_F]; digit_2 = flags_2[REG_MSB_F]; digit_3 = flags_3[REG_MSB_F]; // x: carry_word(IN1 (RES_MAN_x) + IN2 (OP2_MAN)) -> RR instr_0 = { NMEM, C1P2, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; if (digit_0) // OP1_MAN_0 digit == 1, so add to PSUM // 0: IN1 + IN2 + RR -> RW (RES_MAN_0 = RES_MAN_0 + OP2_MAN) instr_0 = { NMEM, RR, NSH, SUM12S, RW, ZEROS, NCAR, NDIS }; else // OP1_MAN_0 digit == 0, so do not add to PSUM // 0: IN1 -> RW (RES_MAN_0) instr_0 = { NMEM, RW, NSH, IN1, RW, ZEROS, NCAR, NDIS }; if (digit_1) // OP1_MAN_1 digit == 1, so add to PSUM // 1: IN1 + IN2 + RR -> RW (RES_MAN_1 = RES_MAN_1 + OP2_MAN) instr_1 = { NMEM, RR, NSH, SUM12S, RW, ZEROS, NCAR, NDIS }; else // OP1_MAN_1 digit == 0, so do not add to PSUM // 1: IN1 -> RW (RES_MAN_1) instr_1 = { NMEM, RW, NSH, IN1, RW, ZEROS, NCAR, NDIS }; if (digit_2) // OP1_MAN_2 digit == 1, so add to PSUM // 2: IN1 + IN2 + RR -> RW (RES_MAN_2 = RES_MAN_2 + OP2_MAN) instr_2 = { NMEM, RR, NSH, SUM12S, RW, ZEROS, NCAR, NDIS }; else // OP1_MAN_2 digit == 0, so do not add to PSUM // 2: IN1 -> RW (RES_MAN_2) instr_2 = { NMEM, RW, NSH, IN1, RW, ZEROS, NCAR, NDIS }; if (digit_3) // OP1_MAN_3 digit == 1, so add to PSUM // 3: IN1 + IN2 + RR -> RW (RES_MAN_3 = RES_MAN_3 + OP2_MAN) instr_3 = { NMEM, RR, NSH, SUM12S, RW, ZEROS, NCAR, NDIS }; else // OP1_MAN_3 digit == 0, so do not add to PSUM // 3: IN1 -> RW (RES_MAN_3) instr_3 = { NMEM, RW, NSH, IN1, RW, ZEROS, NCAR, NDIS }; clockem; // x: RW >> 1 w/0 -> OUT (next RES_MAN_x = RES_MAN_x >> 1 w/0) w_in_0 = 0; w_in_1 = 0; w_in_2 = 0; w_in_3 = 0; instr_0 = { NMEM, RW, `SH 1, ZEROS, R1, SRC, NCAR, NDIS }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = instr_0; clockem; end // 0: NOOP // 1: NOOP // 2: NOOP // 3: OUT -> IN1 (RES_MAN_3) // 3: R0 -> RW (OP3_MAN_3) instr_0 = { NOOP }; instr_1 = instr_0; instr_2 = instr_0; instr_3 = { OIN1, R0, NSH, SRC, RW, ZEROS, NCAR, NDIS }; clockem; // 3: RW >> 1 w/w_out_0 -> R0 (OP1_MAN_3 rotated right) // 3: msb(R0) -> digit_3 w_in_3 = w_out_3; instr_3 = { NMEM, RW, `SH 1, SRC, R0, ZEROS, NCAR, NDIS }; clockem; digit_3 = flags_3[REG_MSB_F]; // 3: carry_word(IN1 (RES_MAN_3) + IN2 (OP2_MAN)) -> RR instr_3 = { NMEM, C1P2, NDIS }; clockem; if (digit_3) // OP1_MAN_3 digit == 1, so add to PSUM // 3: IN1 + IN2 + RR -> RW (RES_MAN_3 = RES_MAN_3 + OP2_MAN) instr_3 = { NMEM, RR, NSH, SUM12S, RW, ZEROS, NCAR, NDIS }; else // OP1_MAN_3 digit == 0, so do not add to PSUM // 3: IN1 -> RW (RES_MAN_3) instr_3 = { NMEM, RW, NSH, IN1, RW, ZEROS, NCAR, NDIS }; clockem; // 3: RW >> 1 w/0 -> OUT (next RES_MAN_3 = RES_MAN_3 >> 1 w/0) w_in_3 = 0; instr_3 = { NMEM, RW, `SH 1, ZEROS, R1, SRC, NCAR, NDIS }; clockem; $display("2. Multiply: %d cycles", clock_ct - old_clock_ct); old_clock_ct = clock_ct; //// 3. Reduce //// Description: //// Reduce the result mantissa from the parallel components. //// Actions: //// RES_MAN = (RES_MAN_0 >> 40) + (RES_MAN_1 >> 27) //// + (RES_MAN_2 >> 14) + RES_MAN_3 //// Output state: //// 1: RES_MAN -> RE //// 2: RES_MAN -> RE //// x: ANDEXP -> R2 //// x: RES_EXP -> R3 //// -: zero(RES) -> zero_res //// -: sign(RES) -> sign_res //// // 0: RW >> 40 -> RS (RES_MAN_0 = RES_MAN_0 >> 40) // 1: RW >> 27 -> OUT (RES_MAN_1 = RES_MAN_1 >> 27) // 2: RW >> 14 -> OUT (RES_MAN_2 = RES_MAN_2 >> 14) // 3: RW -> RN (RES_MAN_3) instr_0 = { NMEM, RW, `SH 40, SRC, RS, SRC, NCAR, NDIS }; instr_1 = { NMEM, RW, `SH 27, SRC, R1, SRC, NCAR, NDIS }; instr_2 = { NMEM, RW, `SH 14, SRC, R1, SRC, NCAR, NDIS }; instr_3 = { NMEM, RW, NSH, SRC, RN, SRC, NCAR, NDIS }; clockem; // Note: only use PE_1 and PE_2 from here on. // x: OUT -> IN1 (RES_MAN_x) // 0: NOOP // 1: RN -> OUT (RES_MAN_0) // 2: RS -> OUT (RES_MAN_3) // 3: NOOP instr_0 = { NOOP }; instr_1 = { OIN1, RN, NSH, ZEROS, R1, SRC, NCAR, NDIS }; instr_2 = { OIN1, RS, NSH, ZEROS, R1, SRC, NCAR, NDIS }; instr_3 = { NOOP }; clockem; // 1: OUT -> IN2 (RES_MAN_0) // 2: OUT -> IN2 (RES_MAN_3) instr_1 = { OIN2, NALU, NCAR, NDIS }; instr_2 = instr_1; clockem; // 1: carry_word( IN1 (RES_MAN_1) + IN2 (RES_MAN_0) ) -> RR // 2: carry_word( IN1 (RES_MAN_2) + IN2 (RE