0% found this document useful (0 votes)
105 views7 pages

Systolic Array Verilog Module Design

The document describes a Verilog implementation of a systolic array, which includes multiple processing elements for matrix multiplication using Vedic multiplication. It defines the architecture, input/output ports, and the necessary modules for arithmetic operations, including adders and multipliers. Additionally, a testbench is provided to simulate the functionality of the systolic array.

Uploaded by

charanyajessie09
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
105 views7 pages

Systolic Array Verilog Module Design

The document describes a Verilog implementation of a systolic array, which includes multiple processing elements for matrix multiplication using Vedic multiplication. It defines the architecture, input/output ports, and the necessary modules for arithmetic operations, including adders and multipliers. Additionally, a testbench is provided to simulate the functionality of the systolic array.

Uploaded by

charanyajessie09
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd

`timescale 1ns / 1ps

module systolic_array(ina_0, ina_4, ina_8, ina_12,


inb_0, inb_1, inb_2, inb_3 ,
outa_3,outa_7,outa_11,outa_15,
outb_12,outb_13,outb_14,outb_15,
clk, reset,sel,
ARin_0,ARin_4,ARin_8,ARin_12,
ARout_3,ARout_7,ARout_11,ARout_15);
input [7:0] ina_0, ina_4, ina_8, ina_12,
inb_0, inb_1, inb_2, inb_3;
output [7:0] outa_3,outa_7,outa_11,outa_15,
outb_12,outb_13,outb_14,outb_15;

input clk, reset;


input sel;
input [17:0] ARin_0,ARin_4,ARin_8,ARin_12;

output [17:0] ARout_3,ARout_7,ARout_11,ARout_15;

wire [7:0] outa_0, outa_1, outa_2, outa_4, outa_5, outa_6, outa_8, outa_9,
outa_10, outa_12, outa_13, outa_14;
wire [7:0] outb_0, outb_1, outb_2, outb_3, outb_4, outb_5, outb_6, outb_7,
outb_8, outb_9, outb_10, outb_11;
wire [17:0] ARin_1, ARin_2, ARin_3, ARin_5, ARin_6, ARin_7, ARin_9,
ARin_10, ARin_11, ARin_13, ARin_14, ARin_15;
wire [17:0] ARout_0, ARout_1, ARout_2, ARout_4, ARout_5, ARout_6, ARout_8,
ARout_9, ARout_10, ARout_12, ARout_13, ARout_14;

PROCESSING_ELEMENT P0
(.ina(ina_0), .inb(inb_0), .clk(clk), .reset(reset), .outa(outa_0), .outb(ou
tb_0), .ARin(ARin_0), .ARout(ARout_0), .sel(sel));
PROCESSING_ELEMENT P1
(.ina(outa_0), .inb(inb_1), .clk(clk), .reset(reset), .outa(outa_1), .outb(ou
tb_1), .ARin(ARout_0), .ARout(ARout_1), .sel(sel));
PROCESSING_ELEMENT P2
(.ina(outa_1), .inb(inb_2), .clk(clk), .reset(reset), .outa(outa_2), .outb(ou
tb_2), .ARin(ARout_1), .ARout(ARout_2), .sel(sel));
PROCESSING_ELEMENT P3 (.ina(outa_2), .inb(inb_3), .clk(clk), .reset(reset),
.outa(outa_3), .outb(outb_3), .ARin(ARout_2), .ARout(ARout_3), .sel(sel));

PROCESSING_ELEMENT P4
(.ina(ina_4), .inb(outb_0), .clk(clk), .reset(reset), .outa(outa_4), .outb(ou
tb_4), .ARin(ARin_4), .ARout(ARout_4), .sel(sel));
PROCESSING_ELEMENT P5
(.ina(outa_4), .inb(outb_1), .clk(clk), .reset(reset), .outa(outa_5), .outb(ou
tb_5), .ARin(ARout_4), .ARout(ARout_5), .sel(sel));
PROCESSING_ELEMENT P6
(.ina(outa_5), .inb(outb_2), .clk(clk), .reset(reset), .outa(outa_6), .outb(ou
tb_6), .ARin(ARout_5), .ARout(ARout_6), .sel(sel));
PROCESSING_ELEMENT P7
(.ina(outa_6), .inb(outb_3), .clk(clk), .reset(reset), .outa(outa_7), .outb(ou
tb_7), .ARin(ARout_6), .ARout(ARout_7), .sel(sel));
PROCESSING_ELEMENT P8
(.ina(ina_8), .inb(outb_4), .clk(clk), .reset(reset), .outa(outa_8), .outb(ou
tb_8), .ARin(ARin_8), .ARout(ARout_8), .sel(sel));
PROCESSING_ELEMENT P9
(.ina(outa_8), .inb(outb_5), .clk(clk), .reset(reset), .outa(outa_9), .outb(ou
tb_9), .ARin(ARout_8), .ARout(ARout_9), .sel(sel));
PROCESSING_ELEMENT P10
(.ina(outa_9), .inb(outb_6), .clk(clk), .reset(reset), .outa(outa_10), .outb(ou
tb_10), .ARin(ARout_9), .ARout(ARout_10), .sel(sel));
PROCESSING_ELEMENT P11
(.ina(outa_10), .inb(outb_7), .clk(clk), .reset(reset), .outa(outa_11), .outb(ou
tb_11), .ARin(ARout_10), .ARout(ARout_11), .sel(sel));

PROCESSING_ELEMENT P12
(.ina(ina_12), .inb(outb_8), .clk(clk), .reset(reset), .outa(outa_12), .outb(o
utb_12), .ARin(ARin_12), .ARout(ARout_12), .sel(sel));
PROCESSING_ELEMENT P13
(.ina(outa_12), .inb(outb_9), .clk(clk), .reset(reset), .outa(outa_13), .outb(ou
tb_13), .ARin(ARout_12), .ARout(ARout_13), .sel(sel));
PROCESSING_ELEMENT P14
(.ina(outa_13), .inb(outb_10), .clk(clk), .reset(reset), .outa(outa_14), .outb(ou
tb_14), .ARin(ARout_13), .ARout(ARout_14), .sel(sel));
PROCESSING_ELEMENT P15
(.ina(outa_14), .inb(outb_11), .clk(clk), .reset(reset), .outa(outa_15), .outb(ou
tb_15), .ARin(ARout_14), .ARout(ARout_15), .sel(sel));

endmodule

`timescale 1ns / 1ps


module PROCESSING_ELEMENT (clk,reset,ina,inb,outa,outb,ARin,sel,ARout);

input clk;
input reset;
input [7:0] ina;
input [7:0] inb;
output reg [7:0] outa;
output reg [7:0] outb;
input [17:0] ARin;
input sel;
output wire [17:0] ARout;

wire [15:0] mult_out;


reg [17:0] ar_reg;
wire [17:0] ar_x;
wire [17:0] mux_out;

// Instantiate the adder module


adder_18bit adder (
.ar_reg(ar_reg),
.mult_out(mult_out),
.ar_x(ar_x)
);
// Instantiate the multiplexer
assign mux_out = sel ? ar_x : ARin; // If sel is high, pass AR_in, else pass
adder output

assign ARout = ar_reg;

vedic_8x8 px1 (.a(ina),.b(inb),.mult(mult_out));

// Update the always block to use the mux's output


always @(posedge clk or negedge reset) begin
if (reset) begin
ar_reg <= 0;
outa <= 0;
outb <= 0;
end else begin
ar_reg <= mux_out; // Use the output from the mux
outa <= ina;
outb <= inb;
end
end

endmodule

// Adder module
module adder_18bit(
input [17:0] ar_reg,
input [15:0] mult_out,
output [17:0] ar_x
);

// Extend mult_out to 18 bits


wire [17:0] extended_mult_out = {2'b00, mult_out};

// Add extended mult_out to ar_reg


assign ar_x = ar_reg + extended_mult_out;

endmodule

//multiplier
`timescale 1ns / 1ps

module vedic_8x8(a, b, mult);

input [7:0] a,b;


output [15:0] mult;
wire [15:0] mult;

wire [7:0] temp1;


wire [7:0] temp2;
wire [7:0] temp3;
wire [9:0] temp4;
wire [9:0] temp5;
wire [7:0] temp6;
wire [7:0] temp7;
vedic4x4 M1(a[3:0], b[3:0], temp1);
assign mult[3:0] = temp1[3:0];

vedic4x4 M2(a[7:4], b[3:0], temp2);


vedic4x4 M3(a[3:0], b[7:4], temp3);

adder10 A1({2'b00, temp2}, {2'b00,temp3}, temp4);


adder10 A2(temp4, {6'b000000, temp1[7:4]}, temp5);
assign mult[7:4] = temp5[3:0];

vedic4x4 M4(a[7:4], b[7:4], temp6);


adder8 A3(temp6, {2'b00,temp5[9:4]}, temp7);

assign mult[15:8] = temp7;

endmodule

module vedic4x4(a, b, mult);

input [3:0] a,b;


output [7:0] mult;
wire [7:0] mult;

wire [3:0] temp1;


wire [3:0] temp2;
wire [3:0] temp3;
wire [5:0] temp4;
wire [5:0] temp5;
wire [3:0] temp6;
wire [3:0] temp7;
wire [5:0] w1;

vedic_2x2 V1(a[1:0], b[1:0], temp1);


assign mult[1:0] = temp1[1:0];

vedic_2x2 V2(a[3:2], b[1:0], temp2);


vedic_2x2 V3(a[1:0], b[3:2], temp3);

assign w1 = {4'b0000, temp1[3:2]};

adder6 A1({2'b00, temp3}, {2'b00, temp2}, temp4);


adder6 A2(temp4, w1, temp5);

assign mult[3:2] = temp5[1:0];

vedic_2x2 V4(a[3:2], b[3:2], temp6);

adder4 A3(temp6, temp5[5:2], temp7);


assign mult[7:4] = temp7;

endmodule

module vedic_2x2 (a, b, mult);


input [1:0] a,b;
output [3:0] mult;

wire [3:0] w;
assign mult[0]= a[0]&b[0];
assign w[0] = a[1]&b[0];
assign w[1] = a[0]&b[1];
assign w[2] = a[1]&b[1];

halfAdder H0(w[0], w[1], mult[1], w[3]);


halfAdder H1(w[2], w[3], mult[2], mult[3]);

endmodule

module halfAdder(a,b,sum,carry);
input a,b;
output sum, carry;

assign sum = a ^ b;
assign carry = a & b;

endmodule

module adder4(a,b,sum);

input [3:0] a,b;


output [3:0] sum;
wire [3:0] sum;

assign sum = a + b;

endmodule

module adder6(a,b,sum);

input [5:0] a,b;


output [5:0] sum;
wire [5:0] sum;

assign sum = a + b;

endmodule

module adder8(a,b,sum);

input [7:0] a,b;


output [7:0] sum;
wire [7:0] sum;

assign sum = a + b;

endmodule

module adder10(a,b,sum);

input [9:0] a,b;


output [9:0] sum;
wire [9:0] sum;

assign sum = a + b;
endmodule

testbench

`timescale 1ns / 1ps

module systolic_array_tb;

// Inputs
reg [7:0] ina_0, ina_4, ina_8, ina_12;
reg [7:0] inb_0, inb_1, inb_2, inb_3;
reg clk, reset, sel;
reg [17:0] ARin_0, ARin_4, ARin_8, ARin_12;

// Outputs
wire [7:0] outa_3, outa_7, outa_11, outa_15;
wire [7:0] outb_12, outb_13, outb_14, outb_15;
wire [17:0] ARout_3, ARout_7, ARout_11, ARout_15;

// Instantiate the Unit Under Test (UUT)


systolic_array uut (
.ina_0(ina_0), .ina_4(ina_4), .ina_8(ina_8), .ina_12(ina_12),
.inb_0(inb_0), .inb_1(inb_1), .inb_2(inb_2), .inb_3(inb_3),
.outa_3(outa_3), .outa_7(outa_7), .outa_11(outa_11), .outa_15(outa_15),
.outb_12(outb_12), .outb_13(outb_13), .outb_14(outb_14), .outb_15(outb_15),
.clk(clk), .reset(reset), .sel(sel),
.ARin_0(ARin_0), .ARin_4(ARin_4), .ARin_8(ARin_8), .ARin_12(ARin_12),
.ARout_3(ARout_3), .ARout_7(ARout_7), .ARout_11(ARout_11), .ARout_15(ARout_
15)
);

initial begin
// Initialize Inputs
ina_0 = 0; ina_4 = 0; ina_8 = 0; ina_12 = 0;
inb_0 = 0; inb_1 = 0; inb_2 = 0; inb_3 = 0;
clk = 0; reset = 0; sel = 0;
ARin_0 = 0; ARin_4 = 0; ARin_8 = 0; ARin_12 = 0;

// Wait for global reset


#100;

// Apply test vectors


reset = 1;
#10;
reset = 0;
sel = 1;

// Test case 1
ina_0 = 8'h01; ina_4 = 8'h02; ina_8 = 8'h03; ina_12 = 8'h04;
inb_0 = 8'h05; inb_1 = 8'h06; inb_2 = 8'h07; inb_3 = 8'h08;
ARin_0 = 18'h00001; ARin_4 = 18'h00002; ARin_8 = 18'h00003; ARin_12 =
18'h00004;
#20;
// Test case 2
ina_0 = 8'h09; ina_4 = 8'h0A; ina_8 = 8'h0B; ina_12 = 8'h0C;
inb_0 = 8'h0D; inb_1 = 8'h0E; inb_2 = 8'h0F; inb_3 = 8'h10;
ARin_0 = 18'h00005; ARin_4 = 18'h00006; ARin_8 = 18'h00007; ARin_12 =
18'h00008;
#20;

// Add more test cases as needed


end

always #5 clk = ~clk; // Clock generation

endmodule

You might also like