Compare commits

...

3 Commits

Author SHA1 Message Date
eac0c922c9 Add VCS version number 2025-01-10 11:01:43 +03:00
2821d98c6b Add -O5 option to QuestaSim. Rerun benchmarks. 2025-01-09 12:54:21 +03:00
Nikolay Puzanov
c277e3482a Add old (without --timing) verilator test 2024-10-02 17:25:04 +03:00
26 changed files with 147 additions and 60 deletions

View File

@@ -40,35 +40,40 @@
## Результаты для 1024 процессоров
- Xeon E5-2630v3 @ 2.40GHz
- Verilator 5.011 devel rev v5.010-98-g15f8ebc56
- 2 x Xeon E5-2630v3 @ 2.40GHz (no HT), 64GB RAM
- NixOS 24.11 Linux Kernel 6.6.67
- GCC 13.3.0
- Verilator 5.028 2024-08-21 rev v5.028
- Icarus Verilog 13.0 (devel) (s20221226-127-gdeeac2edf)
- ModelSim SE-64 2020.4 (Revision: 2020.10)
- QuestaSim 64 2021.1 (Revision: 2021.1)
- Vivado 2021.1
- [OSS CVC](https://github.com/cambridgehackers/open-src-cvc) (rev. 782c69a)
- VCS T-2022.06
Время выполнения бенчмарка на блоке 1кБ (чч:мм:сс):
```
| Симулятор | Build | Run |
+-----------------------+----------+----------+
| CVC | 00:02:22 | 00:51:47 |
| Icarus Verilog | 00:00:27 | 19:04:37 |
| ModelSim | 00:00:00 | 01:33:14 |
| QuestaSim | 00:00:00 | 01:29:38 |
| VCS | TBD | |
| Verilator (1 thread) | 00:12:03 | 00:02:57 |
| Verilator (8 threads) | 00:18:45 | 00:01:33 |
| XSIM | 00:00:29 | 02:08:54 |
| CVC | 00:00:05 | 00:57:15 |
| Icarus Verilog | 00:00:23 | 16:15:02 |
| QuestaSim (+acc) | 00:00:00 | 01:06:54 |
| QuestaSim (-O5) | 00:00:00 | 00:06:50 |
| VCS | 00:00:25 | 00:04:12 |
| Verilator (1 thread) | 00:09:23 | 00:02:45 |
| Verilator (8 threads) | 00:09:02 | 00:00:50 |
| XSIM | 00:00:29 | 02:06:16 |
| Xcelium | TBD | |
```
Удалось протестировать Xcelium и VCS на другом оборудованиии и привести время
выполнения бенчмарка к остальным симам. Время сборки на этих симуляторах примерно
соответствует времени сборки на XSIM (Xcelium ближе к Modelsim).
Удалось протестировать Xcelium на другом оборудованиии и привести время выполнения
бенчмарка к остальным симам. Время сборки на этих симуляторах примерно соответствует
времени сборки на XSIM.
В таблице ниже показано относительное время выполнения теста, приведенное к времени
выполнения на многопоточном Вериляторе.
выполнения на многопоточном Вериляторе. Вериляторы 5.028 и 4.120 показали практически
одинаковую скорость, разность в пределах погрешности. Но в 5.028 была включена опция
`--timing`, а клок формировался в верилоге.
"По просьбе выживших, имена были изменены. Из уважения к погибшим, остальное было
рассказано в точности так, как это произошло."
@@ -76,13 +81,13 @@
```
| Симулятор | Run |
+-----------------------+------+
| CVC | 33 |
| Icarus Verilog | 738 |
| ModelSim | 60 |
| QuestaSim | 58 |
| VCS | 3.8 |
| Verilator (1 thread) | 1.9 |
| CVC | 69 |
| Icarus Verilog | 1170 |
| QuestaSim (+acc) | 80 |
| QuestaSim (-O5) | 8.2 |
| VCS | 5.0 |
| Verilator (1 thread) | 3.3 |
| Verilator (8 threads) | 1 |
| XSIM | 83 |
| Xcelium | 4 |
| XSIM | 152 |
| Xcelium | ~4 |
```

View File

@@ -216,13 +216,11 @@ module md5calculator
);
// Print console output
initial
forever begin
@(posedge clock);
if (!reset && console_send) begin
$write("%c", o_console_data);
$fflush;
end
end
always @(posedge clock) begin
if (!reset && console_send) begin
$write("%c", o_console_data);
$fflush;
end
end
endmodule // testbench

View File

@@ -9,8 +9,11 @@ module testbench #(parameter CPU_COUNT = 1024)
logic [31:0] data_len;
logic [CPU_COUNT-1:0] done_all;
int cycle = 0;
always @(posedge clock) cycle <= cycle + 1;
for (genvar ncpu = 0; ncpu < CPU_COUNT; ncpu = ncpu + 1) begin : cpus
logic done;
logic done, done_ack = 1'b0;
logic reset;
logic [127:0] md5;
@@ -30,27 +33,27 @@ module testbench #(parameter CPU_COUNT = 1024)
if(!$value$plusargs("dlen=%d", data_len))
data_len = DATA_LEN;
initial begin
reset = 1'b1;
repeat($urandom % 5 + 2) @(posedge clock);
reset = 1'b0;
@(posedge clock);
int reset_duration;
initial reset_duration = $urandom % CPU_COUNT + 2;
assign reset = cycle <= reset_duration;
while(!done) @(posedge clock);
$display("MD5(0x%x) = %x", ncpu, md5);
always @(posedge clock) begin
if (cycle > reset_duration && done && !done_ack) begin
done_ack <= 1'b1;
$display("MD5(0x%x) = %x", ncpu, md5);
end
end
end
// Wait for complete
initial begin
$display("--- BENCH BEGIN ---");
repeat(5) @(posedge clock);
while ((&done_all) == 1'b0) @(posedge clock);
@(posedge clock);
$display("--- BENCH DONE ---");
$finish;
always @(posedge clock) begin
if (cycle == 0) $display("--- BENCH BEGIN ---");
else if (cycle > 5) begin
if (&done_all) begin
$display("--- BENCH DONE ---");
$finish;
end
end
end
endmodule // testbench

View File

@@ -20,6 +20,6 @@ sed -i -e "s/CPU_COUNT = 1024/CPU_COUNT = $CPU_COUNT/" top-mod.sv
sources=$(cat $FFILE | grep -v "testbench.sv\|picorv32_tcm.sv")
sv2v --top=top -w simbench-all.v top-mod.sv testbench.sv picorv32_tcm.sv $sources
sed -i '1i `timescale 1ps/1ps' simbench-all.v
patch simbench-all.v simbench-all.patch
cvc64 -o top -O -pipe +large +nospecify simbench-all.v

View File

@@ -0,0 +1,20 @@
--- simbench-all.v 2025-01-08 22:09:06.737402979 +0300
+++ simbench-all.v1 2025-01-08 22:08:27.142909974 +0300
@@ -1,3 +1,4 @@
+`timescale 1ps/1ps
module top;
parameter CPU_COUNT = 1024;
reg clock = 1'b0;
@@ -13,10 +14,9 @@
wire [CPU_COUNT - 1:0] done_all;
reg signed [31:0] cycle = 0;
always @(posedge clock) cycle <= cycle + 1;
- genvar _gv_ncpu_1;
+ genvar ncpu;
generate
- for (_gv_ncpu_1 = 0; _gv_ncpu_1 < CPU_COUNT; _gv_ncpu_1 = _gv_ncpu_1 + 1) begin : cpus
- localparam ncpu = _gv_ncpu_1;
+ for (ncpu = 0; ncpu < CPU_COUNT; ncpu = ncpu + 1) begin : cpus
wire done;
reg done_ack = 1'b0;
wire reset;

View File

@@ -1,13 +1,13 @@
diff --git a/source/testbench.sv b/source/testbench.sv
index 1872eed..6f27f84 100644
index 2949591..084d7a3 100644
--- a/source/testbench.sv
+++ b/source/testbench.sv
@@ -32,7 +32,7 @@ module testbench #(parameter CPU_COUNT = 1024)
@@ -34,7 +34,7 @@ module testbench #(parameter CPU_COUNT = 1024)
data_len = DATA_LEN;
initial begin
reset = 1'b1;
- repeat($urandom % 5 + 2) @(posedge clock);
+ repeat($unsigned($random) % 5 + 2) @(posedge clock);
reset = 1'b0;
@(posedge clock);
int reset_duration;
- initial reset_duration = $urandom % CPU_COUNT + 2;
+ initial reset_duration = $unsigned($random) % CPU_COUNT + 2;
assign reset = cycle <= reset_duration;
always @(posedge clock) begin

View File

@@ -0,0 +1 @@
((verilog-mode . ((flycheck-verilator-include-path . ("../source")))))

2
test-modelsim-O5/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
testbench
transcript

7
test-modelsim-O5/__build.sh Executable file
View File

@@ -0,0 +1,7 @@
#!/usr/bin/env bash
set -e
. ../scripts/sim_vars.sh
rm -rf testbench
vlog -sv -work testbench -vopt $param -f $FFILE top.sv

5
test-modelsim-O5/__run.sh Executable file
View File

@@ -0,0 +1,5 @@
#!/usr/bin/env bash
. ../scripts/sim_vars.sh
vsim -batch -voptargs=-O5 -do "run -all" -quiet +dlen=$BLOCK_SIZE -GCPU_COUNT=$CPU_COUNT -lib testbench top

7
test-modelsim-O5/top.sv Normal file
View File

@@ -0,0 +1,7 @@
`timescale 1ps/1ps
module top #(parameter CPU_COUNT = 1024);
logic clock = 1'b0;
initial forever #(10ns/2) clock = ~clock;
testbench #(CPU_COUNT) testbench (clock);
endmodule

View File

@@ -2,4 +2,4 @@
. ../scripts/sim_vars.sh
vsim -batch -voptargs=+acc=npr -do "run -all" -quiet +dlen=$BLOCK_SIZE -GCPU_COUNT=$CPU_COUNT -lib testbench top
vsim -batch -voptargs="+acc" -do "run -all" -quiet +dlen=$BLOCK_SIZE -GCPU_COUNT=$CPU_COUNT -lib testbench top

View File

@@ -7,8 +7,8 @@ PARAMS :=
THREADS := 1
FLAGS = -Wno-WIDTH -cc --top-module $(TOP_MODULE) +1800-2017ext+sv \
--timing --Mdir $(TOP_MODULE) -o $(TOP_MODULE) -f $(FLAGS_FILE) \
$(PARAMS) --timescale "1ps/1ps" --threads $(THREADS) -j 0
--Mdir $(TOP_MODULE) -o $(TOP_MODULE) -f $(FLAGS_FILE) \
$(PARAMS) --timescale "1ps/1ps" --threads $(THREADS) -j 16
# FLAGS += --trace

1
test-verilator5/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
top

19
test-verilator5/Makefile Normal file
View File

@@ -0,0 +1,19 @@
TOP_MODULE = top
SOURCES = top.sv
FLAGS_FILE = ../source/sources.f
INCLUDES =
PARAMS :=
THREADS := 1
FLAGS = -Wno-WIDTH --top-module $(TOP_MODULE) +1800-2017ext+sv \
--timing --Mdir $(TOP_MODULE) -o $(TOP_MODULE) -f $(FLAGS_FILE) \
$(PARAMS) --timescale "1ps/1ps" --threads $(THREADS) -j 0
# FLAGS += --trace
all: $(SOURCES)
verilator $(FLAGS) --binary $(INCLUDES) $(SOURCES)
clean:
rm -rf $(TOP_MODULE)

7
test-verilator5/__build.sh Executable file
View File

@@ -0,0 +1,7 @@
#!/usr/bin/env bash
set -e
. ../scripts/sim_vars.sh
make clean
make OPT_FAST="-Os -march=native" VM_PARALLEL_BUILDS=0 PARAMS="-GCPU_COUNT=$CPU_COUNT" THREADS=$THREADS

5
test-verilator5/__run.sh Executable file
View File

@@ -0,0 +1,5 @@
#!/usr/bin/env bash
. ../scripts/sim_vars.sh
./top/top +dlen=$BLOCK_SIZE

7
test-verilator5/top.sv Normal file
View File

@@ -0,0 +1,7 @@
`timescale 1ps/1ps
module top #(parameter CPU_COUNT = 2);
logic clock = 1'b0;
initial forever #(10ns/2) clock = ~clock;
testbench #(CPU_COUNT) testbench (clock);
endmodule