From 07cac5de1ba643ec31eadccb31a0b659060af019 Mon Sep 17 00:00:00 2001 From: Matt Sinclair Date: Wed, 21 Aug 2024 04:43:33 +0000 Subject: [PATCH 1/7] Commands for ORNL tutorial for Matt S parts --- materials/archive/isca24/README.md | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 materials/archive/isca24/README.md diff --git a/materials/archive/isca24/README.md b/materials/archive/isca24/README.md new file mode 100644 index 0000000..88f3d64 --- /dev/null +++ b/materials/archive/isca24/README.md @@ -0,0 +1,29 @@ +# stdlib / getting started commands +''' +gem5-mesi materials/archive/isca24/01-basic.py +''' + +''' +gem5 materials/archive/isca24/02-components.py +''' + +''' +gem5 materials/archive/isca24/03-processor.py +''' + +# Sim Objects commands +''' +scons build/NULL/gem5.opt -j$(nproc) +''' + +''' +cp -r materials/03-developing-gem5-models/02-debugging-gem5/step-1/bootcamp gem5/src/ +''' + +''' +build/NULL/gem5.opt src/bootcamp/hello-sim-object/run_hello.py +''' + +''' +build/NULL/gem5.opt --debug-flags=HelloExampleFlag src/bootcamp/hello-sim-object/run_hello.py +''' From 34a19a3c7e86b3c9a67f744452c1db018ec0b3d6 Mon Sep 17 00:00:00 2001 From: Matt Sinclair Date: Wed, 21 Aug 2024 20:39:41 +0000 Subject: [PATCH 2/7] Fix typo in GPU MFMA example --- materials/04-GPU-model/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/materials/04-GPU-model/README.md b/materials/04-GPU-model/README.md index 9db9e77..bcff9df 100644 --- a/materials/04-GPU-model/README.md +++ b/materials/04-GPU-model/README.md @@ -34,7 +34,7 @@ cd /workspaces/2024 # MFMA example ```sh cd /workspaces/2024/ -cp –r materials/04-GPU-model/mfma_fp32/ gem5-resources/src/gpu/mfma_fp32 +cp -r materials/04-GPU-model/mfma_fp32/ gem5-resources/src/gpu/mfma_fp32 cd gem5-resources/src/gpu/mfma_fp32 docker run --rm -v /workspaces/2024:/workspaces/2024 -w ${PWD} ghcr.io/gem5/gpu-fs:latest make cd /workspaces/2024/ From ceea2d5cc6f6b27a8c2ccbdfbf55bd14352aaa22 Mon Sep 17 00:00:00 2001 From: Matt Sinclair Date: Wed, 21 Aug 2024 20:52:22 +0000 Subject: [PATCH 3/7] gpu: add missing output directories for checkpoint from slides to README --- materials/04-GPU-model/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/materials/04-GPU-model/README.md b/materials/04-GPU-model/README.md index bcff9df..f610665 100644 --- a/materials/04-GPU-model/README.md +++ b/materials/04-GPU-model/README.md @@ -63,12 +63,12 @@ docker run --rm -v /workspaces/2024:/workspaces/2024 -w ${PWD} ghcr.io/gem5/gpu- cd /workspaces/2024 ``` ```sh -/usr/local/bin/gem5-vega gem5/configs/example/gpufs/mi200.py --kernel ./vmlinux-gpu-ml-isca --disk-image ./x86-ubuntu-gpu-ml-isca --app ./gem5-resources/src/gpu/square/bin/square --no-kvm-perf --checkpoint-dir ./gpuckpt +/usr/local/bin/gem5-vega -d createckpt gem5/configs/example/gpufs/mi200.py --kernel ./vmlinux-gpu-ml-isca --disk-image ./x86-ubuntu-gpu-ml-isca --app ./gem5-resources/src/gpu/square/bin/square --no-kvm-perf --checkpoint-dir ./gpuckpt ``` # To restore from checkpoint ```sh -/usr/local/bin/gem5-vega gem5/configs/example/gpufs/mi200.py --kernel ./vmlinux-gpu-ml-isca --disk-image ./x86-ubuntu-gpu-ml-isca --app ./gem5-resources/src/gpu/square/bin/square --no-kvm-perf --restore-dir ./gpuckpt +/usr/local/bin/gem5-vega -d restoreckpt gem5/configs/example/gpufs/mi200.py --kernel ./vmlinux-gpu-ml-isca --disk-image ./x86-ubuntu-gpu-ml-isca --app ./gem5-resources/src/gpu/square/bin/square --no-kvm-perf --restore-dir ./gpuckpt ``` # PyTorch example From ec443d73435ac8c6ef821c80273ca226e4000520 Mon Sep 17 00:00:00 2001 From: Noah Krim Date: Wed, 21 Aug 2024 17:26:37 -0700 Subject: [PATCH 4/7] Ensure consistency of slide code with working materials --- slides/03-Developing-gem5-models/04-ports.md | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/slides/03-Developing-gem5-models/04-ports.md b/slides/03-Developing-gem5-models/04-ports.md index 8db2e2d..1a01be3 100644 --- a/slides/03-Developing-gem5-models/04-ports.md +++ b/slides/03-Developing-gem5-models/04-ports.md @@ -1489,7 +1489,7 @@ InspectorGadget::InspectorGadgetStats::InspectorGadgetStats(InspectorGadget* ins Few things to note: -1- Initialize our stat object by adding `stats(this)` to the initialization list in the constructor `InspectorGdaget`. +1- **Important:** initialize our stat object by adding `stats(this)` to the initialization list in the constructor `InspectorGdaget`. 2- `statistics::Group::Group` takes a pointer to an object of `statistics::Group` that will be its parent. Class `SimObject` inherits from `statistics::Group` so we can use a pointer to `InspectorGadget` as that input. 3- The macro `ADD_STAT` registers and initializes our statistics that we have defined under the struct. The order of arguments are `name`, `unit`, `description`. To rid yourself of any headache, make sure the order of `ADD_STAT` macros match that of statistic declaration. @@ -1512,7 +1512,7 @@ InspectorGadget::processNextReqSendEvent() } void -InspectorGadget::processNextReqSendEvent() +InspectorGadget::processNextRespSendEvent() { // ... stats.numResponsesFwded++; @@ -1551,7 +1551,7 @@ InspectorGadget::processNextReqSendEvent() } void -InspectorGadget::processNextReqSendEvent() +InspectorGadget::processNextRespSendEvent() { // ... stats.numResponsesFwded++; @@ -1615,12 +1615,15 @@ Again, we need to model latency. Therefore, we need to declare a new event that --- + + ## Adding Inspection: Header File To declare `nextInspectionEvent`, add the following lines under the `private` scope of `InspectorGadget` in `src/bootcamp/inspector-gadget/inspcetor_gadget.hh`. ```cpp private: + int output_buffer_entries; TimedQueue outputBuffer; EventFunctionWrapper nextInspectionEvent; @@ -1735,7 +1738,7 @@ Now, let's declare a new class that inherits from `Packet::SenderState`. Let's d { uint64_t sequenceNumber; SequenceNumberTag(uint64_t sequenceNumber): - SenderState(), (sequenceNumber) + SenderState(), sequenceNumber(sequenceNumber) {} }; ``` @@ -1879,7 +1882,7 @@ InspectorGadget::scheduleNextReqSendEvent(Tick when) bool have_items = !outputBuffer.empty(); if (port_avail && have_items && !nextReqSendEvent.scheduled()) { - Tick schedule_time = align(std::max(when, inspectionBuffer.firstReadyTime())); + Tick schedule_time = align(std::max(when, outputBuffer.firstReadyTime())); schedule(nextReqSendEvent, schedule_time); } } @@ -1925,7 +1928,7 @@ void InspectorGadget::processNextReqSendEvent() { panic_if(memSidePort.blocked(), "Should never try to send if blocked!"); - panic_if(!inspectionBuffer.hasReady(curTick()), "Should never try to send if no ready packets!"); + panic_if(!outputBuffer.hasReady(curTick()), "Should never try to send if no ready packets!"); stats.numRequestsFwded++; PacketPtr pkt = outputBuffer.front(); @@ -2164,7 +2167,7 @@ InspectorGadget::scheduleNextInspectionEvent(Tick when) if (have_packet && have_entry && !nextInspectionEvent.scheduled()) { Tick first_avail_insp_unit_time = \ - std::min_element( + *std::min_element( inspectionUnitAvailableTimes.begin(), inspectionUnitAvailableTimes.end() ); @@ -2207,7 +2210,7 @@ InspectorGadget::processNextInspectionEvent() stats.totalInspectionBufferLatency += curTick() - inspectionBuffer.frontTime(); PacketPtr pkt = inspectionBuffer.front(); inspectRequest(pkt); - outputBuffer.push(pkt, curTick()); + outputBuffer.push(pkt, clockEdge(totalInspectionLatency)); inspectionBuffer.pop(); inspectionUnitAvailableTimes[i] = clockEdge(totalInspectionLatency); insp_window_left--; From 32e8cdd945cd99c66ce9bf1ba49164f6d34a63fa Mon Sep 17 00:00:00 2001 From: Erin Le Date: Fri, 23 Aug 2024 00:33:17 +0000 Subject: [PATCH 5/7] Messed up with stashing/rebasing; committing changes that I think have my typo pass before cherry-picking the commit that does have them --- .../02-Using-gem5/01-stdlib/01-02-fs-mode.py | 6 +- slides/01-Introduction/02-getting-started.md | 4 +- .../01-Introduction/03-python-background.md | 4 +- slides/02-Using-gem5/01-stdlib.md | 14 +- slides/02-Using-gem5/03-running-in-gem5.md | 10 +- slides/02-Using-gem5/06-memory.md | 6 +- .../08-accelerating-simulation.md | 16 +- slides/02-Using-gem5/11-multisim.md | 4 +- slides/03-Developing-gem5-models/04-ports.md | 75 ++++----- .../05-modeling-cores.md | 18 +-- .../06-modeling-cache-coherence.md | 147 +++++++++--------- .../07-chi-protocol.md | 26 ++-- .../08-ruby-network.md | 4 +- .../09-extending-gem5-models.md | 119 +++++++------- slides/05-Other-simulators/01-sst.md | 26 ++-- slides/05-Other-simulators/02-dram.md | 24 +-- slides/06-Contributing/03-gem5-at-home.md | 2 +- 17 files changed, 256 insertions(+), 249 deletions(-) diff --git a/materials/02-Using-gem5/01-stdlib/01-02-fs-mode.py b/materials/02-Using-gem5/01-stdlib/01-02-fs-mode.py index f35754b..9cfd970 100644 --- a/materials/02-Using-gem5/01-stdlib/01-02-fs-mode.py +++ b/materials/02-Using-gem5/01-stdlib/01-02-fs-mode.py @@ -14,11 +14,11 @@ # Here we set up a MESI Two Level Cache Hierarchy. cache_hierarchy = MESITwoLevelCacheHierarchy( - l1d_size="16kB", + l1d_size="16KiB", l1d_assoc=8, - l1i_size="16kB", + l1i_size="16KiB", l1i_assoc=8, - l2_size="256kB", + l2_size="256KiB", l2_assoc=16, num_l2_banks=1, ) diff --git a/slides/01-Introduction/02-getting-started.md b/slides/01-Introduction/02-getting-started.md index b474c2e..2dea1b6 100644 --- a/slides/01-Introduction/02-getting-started.md +++ b/slides/01-Introduction/02-getting-started.md @@ -183,9 +183,9 @@ board = X86DemoBoard() The X86DemoBoard has the following properties: -- Single Channel DDR3, 2GB Memory. +- Single Channel DDR3, 2GiB Memory. - A 4 core 3GHz processor (using gem5’s "timing" model). -- A MESI Two Level Cache Hierarchy, with 32kB data and instruction case and a 1MB L2 Cache. +- A MESI Two Level Cache Hierarchy, with 32KiB data and instruction case and a 1MiB L2 Cache. - Will be run as a Full-System simulation. Source is available: [src/python/gem5/prebuilt/demo/x86_demo_board.py](../../gem5/src/python/gem5/prebuilt/demo/x86_demo_board.py). diff --git a/slides/01-Introduction/03-python-background.md b/slides/01-Introduction/03-python-background.md index ea17f30..e89a201 100644 --- a/slides/01-Introduction/03-python-background.md +++ b/slides/01-Introduction/03-python-background.md @@ -30,7 +30,7 @@ from m5.objects import CPU, L1Cache cpu = CPU() # Create the CPU SimObject cpu.clock = '1GHz' # Set it's parameters -cpu.l1_cache = L1Cache(size="64kB") # Connect it to other SimObjects. +cpu.l1_cache = L1Cache(size="64KiB") # Connect it to other SimObjects. # ... more configuration ... ``` @@ -1022,7 +1022,7 @@ class L1ICache(L1Cache): def __init__(self): # Set the size - self.size = "32kB" + self.size = "32KiB" super().__init__() # This is the implementation needed for diff --git a/slides/02-Using-gem5/01-stdlib.md b/slides/02-Using-gem5/01-stdlib.md index 4ea7af2..bb05a31 100644 --- a/slides/02-Using-gem5/01-stdlib.md +++ b/slides/02-Using-gem5/01-stdlib.md @@ -92,11 +92,11 @@ from gem5.simulate.simulator import Simulator ```python cache_hierarchy = MESITwoLevelCacheHierarchy( - l1d_size="16kB", + l1d_size="16KiB", l1d_assoc=8, - l1i_size="16kB", + l1i_size="16KiB", l1i_assoc=8, - l2_size="256kB", + l2_size="256KiB", l2_assoc=16, num_l2_banks=1, ) @@ -481,16 +481,16 @@ This adds a two-level cache hierarchy and a memory system. ```python cache_hierarchy = MESITwoLevelCacheHierarchy( - l1d_size="16kB", + l1d_size="16KiB", l1d_assoc=8, - l1i_size="16kB", + l1i_size="16KiB", l1i_assoc=8, - l2_size="256kB", + l2_size="256KiB", l2_assoc=16, num_l2_banks=1, ) -memory = SingleChannelDDR3_1600(size="3GB") +memory = SingleChannelDDR3_1600(size="3GiB") ``` --- diff --git a/slides/02-Using-gem5/03-running-in-gem5.md b/slides/02-Using-gem5/03-running-in-gem5.md index 51aa359..6565f0c 100644 --- a/slides/02-Using-gem5/03-running-in-gem5.md +++ b/slides/02-Using-gem5/03-running-in-gem5.md @@ -587,7 +587,7 @@ class LinearGenerator(AbstractGenerator): self, num_cores: int = 1, duration: str = "1ms", - rate: str = "100GB/s", + rate: str = "100GiB/s", block_size: int = 64, min_addr: int = 0, max_addr: int = 32768, @@ -606,7 +606,7 @@ class RandomGenerator(AbstractGenerator): self, num_cores: int = 1, duration: str = "1ms", - rate: str = "100GB/s", + rate: str = "100GiB/s", block_size: int = 64, min_addr: int = 0, max_addr: int = 32768, @@ -689,7 +689,7 @@ Add a traffic generator right below `memory = SingleChannelDDR3_1600()` with the following lines. ```python -generator = LinearGenerator(num_cores=1, rate="1GB/s") +generator = LinearGenerator(num_cores=1, rate="1GiB/s") ``` ### @@ -719,7 +719,7 @@ cache_hierarchy = MyPrivateL1SharedL2CacheHierarchy() memory = SingleChannelDDR3_1600() -generator = LinearGenerator(num_cores=1, rate="1GB/s") +generator = LinearGenerator(num_cores=1, rate="1GiB/s") motherboard = TestBoard( clk_freq="3GHz", @@ -917,7 +917,7 @@ class HybridGenerator(AbstractGenerator): self, num_cores: int = 2, duration: str = "1ms", - rate: str = "1GB/s", + rate: str = "1GiB/s", block_size: int = 8, min_addr: int = 0, max_addr: int = 131072, diff --git a/slides/02-Using-gem5/06-memory.md b/slides/02-Using-gem5/06-memory.md index 2866b8d..2cadaad 100644 --- a/slides/02-Using-gem5/06-memory.md +++ b/slides/02-Using-gem5/06-memory.md @@ -122,7 +122,7 @@ gem5 run-mem.py Results for running with 16 GiB/s, 32 GiB/s, 64 GiB/s, and 100% reads and 50% reads. -| Bandwidth | Read Percentage | Linear Speed (GB/s) | Random Speed (GB/s) | +| Bandwidth | Read Percentage | Linear Speed (GiB/s) | Random Speed (GiB/s) | |-----------|-----------------|---------------------|---------------------| | 16 GiB/s | 100% | 17.180288 | 17.180288 | | | 50% | 17.180288 | 17.180288 | @@ -178,7 +178,7 @@ SingleChannelDDR4_2400() Results for running with 16 GiB/s, 32 GiB/s, and 100% reads and 50% reads. -| Bandwidth | Read Percentage | Linear Speed (GB/s) | Random Speed (GB/s) | +| Bandwidth | Read Percentage | Linear Speed (GiB/s) | Random Speed (GiB/s) | |-----------|-----------------|---------------------|---------------------| | 16 GiB/s | 100% | 13.85856 | 14.557056 | | | 50% | 13.003904 | 13.811776 | @@ -231,7 +231,7 @@ from lpddr2 import SingleChannelLPDDR2 Results for running with 16 GiB/s, and 100% reads and 50% reads. -| Bandwidth | Read Percentage | Linear Speed (GB/s) | Random Speed (GB/s) | +| Bandwidth | Read Percentage | Linear Speed (GiB/s) | Random Speed (GiB/s) | |-----------|-----------------|---------------------|---------------------| | 16 GiB/s | 100% | 4.089408 | 4.079552 | | | 50% | 3.65664 | 3.58816 | diff --git a/slides/02-Using-gem5/08-accelerating-simulation.md b/slides/02-Using-gem5/08-accelerating-simulation.md index f05de90..0e510ef 100644 --- a/slides/02-Using-gem5/08-accelerating-simulation.md +++ b/slides/02-Using-gem5/08-accelerating-simulation.md @@ -546,12 +546,12 @@ cache_hierarchy = NoCache() ## 03-checkpoint-and-restore -Next, let's set up a simple single channel memory with 3GB. +Next, let's set up a simple single channel memory with 3GiB. ```python -# Let's set up a SingleChannelDDR4_2400 memory with 3GB size +# Let's set up a SingleChannelDDR4_2400 memory with 3GiB size from gem5.components.memory.single_channel import SingleChannelDDR4_2400 -memory = SingleChannelDDR4_2400(size="3GB") +memory = SingleChannelDDR4_2400(size="3GiB") # ``` @@ -710,10 +710,10 @@ For this example, our cache hierarchies, memory types, and CPU types are differe ```python # restoring script cache_hierarchy = PrivateL1CacheHierarchy( - l1d_size="32kB", - l1i_size="32kB" + l1d_size="32KiB", + l1i_size="32KiB" ) -memory = DualChannelDDR4_2400(size="3GB") +memory = DualChannelDDR4_2400(size="3GiB") processor = SimpleProcessor( cpu_type=CPUTypes.TIMING, isa=ISA.X86, @@ -726,7 +726,7 @@ processor = SimpleProcessor( ```python # checkpointing script cache_hierarchy = NoCache() -memory = SingleChannelDDR4_2400(size="3GB") +memory = SingleChannelDDR4_2400(size="3GiB") processor = SimpleProcessor( cpu_type=CPUTypes.KVM, isa=ISA.X86, @@ -738,7 +738,7 @@ processor = SimpleProcessor( ## 03-checkpoint-and-restore -These changes all fall within the limits of the restrictions, but if we change the memory size from `3GB` to `2GB`, we will see the following error. +These changes all fall within the limits of the restrictions, but if we change the memory size from `3GiB` to `2GiB`, we will see the following error. ```bash src/mem/physical.cc:462: fatal: Memory range size has changed! Saw 3221225472, expected 2147483648 diff --git a/slides/02-Using-gem5/11-multisim.md b/slides/02-Using-gem5/11-multisim.md index a3de9f1..6e86cd4 100644 --- a/slides/02-Using-gem5/11-multisim.md +++ b/slides/02-Using-gem5/11-multisim.md @@ -110,8 +110,8 @@ Put this line near the top of your configuration script. ## Use simple Python constructs to define multiple simulations ```python -for data_cache_size in ["8kB","16kB"]: - for instruction_cache_size in ["8kB","16kB"]: +for data_cache_size in ["8KiB","16KiB"]: + for instruction_cache_size in ["8KiB","16KiB"]: cache_hierarchy = PrivateL1CacheHierarchy( l1d_size=data_cache_size, l1i_size=instruction_cache_size, diff --git a/slides/03-Developing-gem5-models/04-ports.md b/slides/03-Developing-gem5-models/04-ports.md index 1a01be3..230b36b 100644 --- a/slides/03-Developing-gem5-models/04-ports.md +++ b/slides/03-Developing-gem5-models/04-ports.md @@ -158,7 +158,7 @@ In this step, we will implement our new `SimObject` called `InspectorGadget`. `I - Step 1: We will implement `InspectorGadget` to forward traffic from CPU to memory and back, causing latency for queueing traffic. - Step 2: We will extend `InspectorGadget` to *inspect* the traffic, causing further delay (for `1 cycle`) for inspection. -- Step 3: We will extend `InpsectorGadget` like below: +- Step 3: We will extend `InspectorGadget` like below: - It will do multiple inspection every cycle, resulting in higher traffic throughput. - It will expose `inspection_latency` as a parameter. - Step 4: We will extend `InspectorGadget` to allow for pipelining of the inspections. @@ -225,7 +225,7 @@ touch SConscript --- -## InspectoGadget: SimObject Declaration File +## InspectorGadget: SimObject Declaration File Now, inside `InspectorGadget.py`, let's define `InspectorGadget` as a `ClockedObject`. To do that, we need to import `ClockedObject`. Do it by adding the following line to `InspectorGadget.py`. @@ -528,7 +528,7 @@ Let's take a deeper look into what we added for class `MemSidePort`. 1: Like `CPUSidePort`, a `MemSidePort` instance holds a pointer to its `owner` with `InspectorGadget* owner`. We do this to access the owner when we receive `responses`, i.e. when `recvTimingResp` is called. 2: When `MemSidePort::sendTimingReq` receives false, it means the request was blocked. We track a pointer to this blocked `Packet` in `PacketPtr blockedPacket` so that we can retry the request later. 3: Function `blocked` tells us if we are blocked by the memory side, i.e. still waiting to receive a `retry request` from memory side. -4: Function `sendPacket` is a wrapper around `sendTimingReq` to give our code more structure. Notice we don't need to definte `sendTimingReq` as it is already defined by `TimingRequestProtocol`. +4: Function `sendPacket` is a wrapper around `sendTimingReq` to give our code more structure. Notice we don't need to define `sendTimingReq` as it is already defined by `TimingRequestProtocol`. 5: We will need to implement all of the functions that relate to moving packets – the ones that start with `recv`. We will use `owner` to implement most of the functionality of these functions within `InspectorGadget`. --- @@ -861,7 +861,7 @@ InspectorGadget::recvFunctional(PacketPtr pkt) Looking at `recvAtomic`, this function returns a value of type `Tick`. This value is supposed to represent the latency of the access if that access was done in singularity, i.e atomically/without being interleaved. **CAUTION**: This latency is not an accurate representation of the actual latency of the access in a real setup. In a real setup there are many accesses happening at the same time and most of the time accesses do not happen atomically. -Let's add *one* cycle to the latency of accesses from the lower level of memory hierarchy. To do this we are going to call `clockPeriod` from the parent class of `InspectorGadget`, which is `ClockedObject`. This function returns the period of the `clk_domain` in `Ticks`. Add the following code to define of `InspectorGadget::recvAtomic` in `inspector_gadget.cc`. +Let's add *one* cycle to the latency of accesses from the lower level of memory hierarchy. To do this we are going to call `clockPeriod` from the parent class of `InspectorGadget`, which is `ClockedObject`. This function returns the period of the `clk_domain` in `Ticks`. Add the following code to define `InspectorGadget::recvAtomic` in `inspector_gadget.cc`. ```cpp Tick @@ -932,9 +932,9 @@ If you remember from [Event Driven Simulation](./03-event-driven-sim.md), we als ## Managing the Schedule of nextReqSendEvent -Now, that we have declared `nextReqSendEvent`, we can schedule `nextReqSendEvent` in `InspectorGadget::recvTimingReq`. We will see in a few slides why it is helpful to have a function that decides if and when `nextReqSendEvent` should be scheduled. +Now that we have declared `nextReqSendEvent`, we can schedule `nextReqSendEvent` in `InspectorGadget::recvTimingReq`. We will see in a few slides why it is helpful to have a function that decides if and when `nextReqSendEvent` should be scheduled. -What I do when I write `SimObjects` is that, for every `event`, I create a function to schedule that event. I name these functions with `schedule` prefixing the name of the event. Let's go ahead and a declare `scheduleNextReqSendEvent` under the `private` scope in `InspectorGadget`. +What I do when I write `SimObjects` is that, for every `event`, I create a function to schedule that event. I name these functions with `schedule` prefixing the name of the event. Let's go ahead and declare `scheduleNextReqSendEvent` under the `private` scope in `InspectorGadget`. Open `inspector_gadget.hh` and add the following lines: @@ -951,7 +951,7 @@ We'll see that one `event` might be scheduled in multiple locations in the code. ## Back to InspectorGadget::recvTimingReq -Now, we can finally go ahead and add a function call to `scheduleNextReqSendEvent` in `InspectorGadget::recvTimingReq`. Since we are assuming it will take **one** `cycle` to insert an item to `inspectionBuffer`, we're going to pass `nextCycle()` as `when` argument. +Now, we can finally go ahead and add a function call to `scheduleNextReqSendEvent` in `InspectorGadget::recvTimingReq`. Since we are assuming it will take **one** `cycle` to insert an item to `inspectionBuffer`, we're going to pass `nextCycle()` as the `when` argument. This is how `InspectorGadget::recvTimingReq` should look after all the changes. @@ -1028,7 +1028,7 @@ Here are a few things to note about `processNextReqSendEvent`: > Let's take a step back... -Are we done with `cpuSidePort` yet? If we look at `InspectorGadget::recvTimingReq`, we return false when there is not enough space in `inspectionBuffer`. Also, if you remember, if the `reponsder` (in our case `InspectorGadget`) rejects a `request` because it's busy (in our case because we don't have enough space in `inspectionBuffer`), the `responder` has to send a `request retry` when it becomes available (in our case, when there is room freed in `inspectionBuffer`). So let's go ahead and send a `request retry` to the `peer` of `cpuSidePort`. We need to send that retry **one cycle later**. So, we need another event for that. Let's go ahead and add it. +Are we done with `cpuSidePort` yet? If we look at `InspectorGadget::recvTimingReq`, we return false when there is not enough space in `inspectionBuffer`. Also, if you remember, if the `responder` (in our case `InspectorGadget`) rejects a `request` because it's busy (in our case because we don't have enough space in `inspectionBuffer`), the `responder` has to send a `request retry` when it becomes available (in our case, when there is room freed in `inspectionBuffer`). So let's go ahead and send a `request retry` to the `peer` of `cpuSidePort`. We need to send that retry **one cycle later**. So, we need another event for that. Let's go ahead and add it. --- @@ -1067,7 +1067,7 @@ InspectorGadget::scheduleNextReqRetryEvent(Tick when) ## Back to processNextReqSendEvent -Now all that is left to do in `processNextReqSendEvent` is to try scheduling `nextReqRetry` for `nextCycle` after we have sent a `Packet`. Let's go ahead and add that our code. This is how `processNextReqSendEvent` should look like after these changes: +Now all that is left to do in `processNextReqSendEvent` is to try scheduling `nextReqRetry` for `nextCycle` after we have sent a `Packet`. Let's go ahead and add that to our code. This is how `processNextReqSendEvent` should look after these changes: ```cpp void @@ -1122,7 +1122,7 @@ Make sure to add the following include statement as well since we're using `std: We're almost done with defining the whole `request` path. The only thing that remains is to react to `request retries` we receive from the `peer` of `memSidePort`. -Since we tracked the last `Packet` that we have tried to send, we can simply try sending that packet again. Let's consider the following for this function: +Since we tracked the last `Packet` that we tried to send, we can simply try sending that packet again. Let's consider the following for this function: 1: We shouldn't receive a `request retry` if we're not blocked. 2: For now, let's accept that there might be scenarios when a `request retry` will arrive but when we try to send `blockedPacket`, it will be rejected again. So let's account for that when writing `MemSidePort::recvReqRetry`. @@ -1133,7 +1133,7 @@ Since we tracked the last `Packet` that we have tried to send, we can simply try ## MemSidePort::recvReqRetry cont. -Add the following code to `inspector_gadget.cc` to define `MemSidePort::recvReqRetry` +Add the following code to `inspector_gadget.cc` to define `MemSidePort::recvReqRetry`. ```cpp void @@ -1252,7 +1252,7 @@ To find the definition for all these functions please look at the [complete vers ## InspectorGadget::InspectorGadget -Now, what we have to do is define the constructor of `InspectorGadget`. To do it add the following code to `inspector_gadget.cc`: +Now we have to define the constructor of `InspectorGadget`. To do it add the following code to `inspector_gadget.cc`: ```cpp InspectorGadget::InspectorGadget(const InspectorGadgetParams& params): @@ -1275,7 +1275,7 @@ InspectorGadget::InspectorGadget(const InspectorGadgetParams& params): ## SimObject::init -Last step before compilation is to define the `init` function. Since `InspectorGadget` is a `Responder` object, the convention is to let `peer` ports know that they can ask for their address range when the ranges become known. `init` is a `virtual` and `public` function from `SimObject`. Let's go ahead and declare it to override it. To do this, add the following declaration to the `public` scope of `InspectorGadget` in `inspector-gadget.hh`. +The last step before compilation is to define the `init` function. Since `InspectorGadget` is a `Responder` object, the convention is to let `peer` ports know that they can ask for their address range when the ranges become known. `init` is a `virtual` and `public` function from `SimObject`. Let's go ahead and declare it to override it. To do this, add the following declaration to the `public` scope of `InspectorGadget` in `inspector-gadget.hh`. ```cpp virtual void init() override; @@ -1397,7 +1397,7 @@ class InspectedMemory(ChanneledMemory): ## first-inspector-gadget-example.py -Now, let's just simply add the following imports to `gem5/configs/bootcamp/inspector-gadget/first-inspector-gadget-example.py`: +Now, let's add the following imports to `gem5/configs/bootcamp/inspector-gadget/first-inspector-gadget-example.py`: ```python from components.inspected_memory import InspectedMemory @@ -1436,9 +1436,9 @@ In the next slide, there is a recording of my terminal when running the command In this step, we see how to add statistics to our `SimObjects` so that we can measure things with them. For now let's add statistics to measure the following. 1- The sum of the queueing latency in `inspectionBuffer` experienced by each `Packet`. Let's use the name `totalInspectionBufferLatency` for this statistic. -2- Total number of `requests` forwarded. Let'use the name `numRequestsFwded`. +2- Total number of `requests` forwarded. Let's use the name `numRequestsFwded`. 3- The sum of the queueing latency in `responseBuffer` experienced by each `Packet`. Let's use the name `totalResponseBufferLatency` for this statistic. -4- Total number of `requests` forwarded. Let'use the name `numResponsesFwded`. +4- Total number of `requests` forwarded. Let's use the name `numResponsesFwded`. --- @@ -1524,7 +1524,7 @@ InspectorGadget::processNextRespSendEvent() ## Measuring Queueing Latencies -To measure the queueing latency in `inspectionBuffer` and `responseBuffer` we need to track the time each `Packet` is inserted in these buffers as well the time they are removed. We already track the insertion time for each `Packet`. We only need to make it accessible from the outside. We can use `curTick()` in `processNextReqSendEvent` and `processNextRespSendEvent` to track the time each `Packet` is removed from `inspectionBuffer` and `responseBuffer` respectively. +To measure the queueing latency in `inspectionBuffer` and `responseBuffer` we need to track the time at which each `Packet` is inserted in these buffers as well the time they are removed. We already track the insertion time for each `Packet`. We only need to make it accessible from the outside. We can use `curTick()` in `processNextReqSendEvent` and `processNextRespSendEvent` to track the time each `Packet` is removed from `inspectionBuffer` and `responseBuffer` respectively. Let's go ahead an add the following function inside the `public` scope of `TimedQueue`. @@ -1538,7 +1538,7 @@ Let's go ahead an add the following function inside the `public` scope of `Timed ## Measuring Queueing Latencies cont. -This is how `processNextReqSendEvent`, `processNextRespSendEvent` would look for measuring all statistics. +This is how `processNextReqSendEvent` and `processNextRespSendEvent` look for measuring all statistics. ```cpp void @@ -1577,7 +1577,8 @@ Now, let's go ahead and run the simulation again. We don't need to make any chan ./build/NULL/gem5.opt configs/bootcamp/inspector-gadget/first-inspector-gadget-example.py ``` -Now if you search for the name of the stats we added in `m5out/stats.txt`. This is what we will see. **NOTE**: I did by searching for the name of the `InspectorGadget` objects in the file using `grep inspectors m5out/stats.txt` in the base gem5 directory. +This is what we will see if we search for the name of the stats we added in `m5out/stats.txt`. +**NOTE**: I searched for the name of the `InspectorGadget` objects in the file using `grep inspectors m5out/stats.txt` in the base gem5 directory. ```sh system.memory.inspectors0.totalInspectionBufferLatency 7334 # Total inspection buffer latency. (Tick) @@ -1607,7 +1608,7 @@ system.memory.inspectors1.numResponsesFwded 18 # In this step, we're going to add an *inspection* step to the process of forwarding requests that we receive. You'll see that we will **not** create any class that models the inspection. For the purposes of this tutorial, the process of *inspection* is completely trivial; we just care about its latency. In this step, let's just assume that inspection takes `1 cycle` to inspect the `request`. -This is how the `request path` will look like after our changes in this step. +This is how the `request path` will look after our changes in this step. `CPUSidePort.recvTimingReq->InspectorGadget.recvTimingReq->[inspection]->InspectorGadget.processNextReqSendEvent->MemSidePort.sendPacket` @@ -1660,7 +1661,7 @@ Let's get rid of the easy things first. Add the lines below to the initializatio ### Changing the Request Path: InspectorGadget::recvTimingReq -The next thing we need to is schedule `nextInspectionEvent` in `InspectorGadget::recvTimingReq`. Currently, we schedule `nextReqSendEvent` in `InspectorGadget::recvTimingReq`. This is how the code in `src/bootcamp/inspector-gadget/inspector_gadget.cc` looks like right now (before our changes). +The next thing we need to is schedule `nextInspectionEvent` in `InspectorGadget::recvTimingReq`. Currently, we schedule `nextReqSendEvent` in `InspectorGadget::recvTimingReq`. This is how the code in `src/bootcamp/inspector-gadget/inspector_gadget.cc` looks right now (before our changes). ```cpp bool @@ -1677,7 +1678,7 @@ InspectorGadget::recvTimingReq(PacketPtr pkt) ### -We will just simply replace `scheduleNextReqSendEvent(nextCycle());` with `scheduleNextInspectionEvent(nextCycle());` in this function. This is how the function should look like after the changes. +We will just simply replace `scheduleNextReqSendEvent(nextCycle());` with `scheduleNextInspectionEvent(nextCycle());` in this function. This is how the function should look after the changes. ```cpp bool @@ -1697,7 +1698,7 @@ InspectorGadget::recvTimingReq(PacketPtr pkt) ## Changing the Request Path: InspectorGadget::scheduleNextInspectionEvent -Let's take a step back. Each *inspection* takes a `request` from `inspectionBuffer`, *inspects* the `request` and puts it in `outputBuffer`. To determine whether `nextInspectionEvent` has to be scheduled we need to check **a**) if there is a `Packet` in `inspectionBuffer` and **b**) if there is at least one empty entry in `outputBuffer`. If both conditions are satisfied, we need to calculate the right time it should be scheduled for like we have been doing it already. +Let's take a step back. Each *inspection* takes a `request` from `inspectionBuffer`, *inspects* the `request` and puts it in `outputBuffer`. To determine whether `nextInspectionEvent` has to be scheduled we need to check **a**) if there is a `Packet` in `inspectionBuffer` and **b**) if there is at least one empty entry in `outputBuffer`. If both conditions are satisfied, we need to calculate the right time it should be scheduled for, like we have been doing already. Add the following code to `src/bootcamp/inspector-gadget/inspector_gadget.cc` under `namespace gem5` to define `InspectorGadget::scheduleNextInspectionEvent`. @@ -1748,7 +1749,7 @@ Now, let's declare a new class that inherits from `Packet::SenderState`. Let's d ## InspectorGadget::inspectRequest: Declaring Additional Members -Now, let's go ahead and declare and define a function that does the inspection for us. To count the number of displacements we need to keep track of the next *sequence number* we expect. We need to increment this variable every time we receive a response. In addition, we need to generate new *sequence numbers* as well. Therefore, we need to keep track of the next available *sequence number*. Lastly, we need to count the number of displacement in a variable. Let's add that to `InspectorGadgetStats`. Add the following lines under the `private` scope of `InspectorGadgetStats` in `src/bootcamp/inspector-gadget/inspector_gadget.hh`. +Now, let's go ahead and declare and define a function that does the inspection for us. To count the number of displacements, we need to keep track of the next *sequence number* we expect. We need to increment this variable every time we receive a response. In addition, we need to generate new *sequence numbers* as well. Therefore, we need to keep track of the next available *sequence number*. Lastly, we need to count the number of displacements in a variable. Let's add that to `InspectorGadgetStats`. Add the following lines under the `private` scope of `InspectorGadgetStats` in `src/bootcamp/inspector-gadget/inspector_gadget.hh`. ```cpp private: @@ -1787,7 +1788,7 @@ Add the following line to the initialization list in `InspectorGadget::Inspector ## InspectorGadget::inspectRequest -Now, let's go ahead an declare a function that *inspects* `requests` as they are popped from `inspectionBuffer`. To do this, add the following line under the `private` scope of `InspectorGadget` in `src/bootcamp/inspector-gadget/inspector-gadget.hh`. +Now, let's go ahead and declare a function that *inspects* `requests` as they are popped from `inspectionBuffer`. To do this, add the following line under the `private` scope of `InspectorGadget` in `src/bootcamp/inspector-gadget/inspector-gadget.hh`. ```cpp private: @@ -1817,7 +1818,7 @@ Now, we need to define the callback function for `nextInspectionEvent`. To simulate inspection and its latency, we need to pop the first item in `inspectionBuffer`, *inspect* it and push it in the `outputBuffer` for it to be sent to the memory. Then we can schedule `nextReqSendEvent` for `nextCycle`. -Now, since `processNextInspectionEvent` is popping items off of `inspectionBuffer`, it now becomes responsible for sending `retry requests` from `cpuSidePort`. This means we need to schedule `nextReqRetryEvent` for `nextCycle` as well. +Since `processNextInspectionEvent` is popping items off of `inspectionBuffer`, it now becomes responsible for sending `retry requests` from `cpuSidePort`. This means we need to schedule `nextReqRetryEvent` for `nextCycle` as well. We also need to schedule `nextInspectionEvent` for `nextCycle`. We discussed why before. So far we have added `nextInspectionEvent` after `recvTimingReq`. In the next slides, we change `nextReqSendEvent` accordingly. @@ -1856,7 +1857,7 @@ Now that we have add `nextInspectionEvent` and `outputBuffer`, we need to change What we are going to change is that `nextReqSendEvent` is going to pop items off of `outputBuffer` instead of `inspectionBuffer`. -This is how `scheduleNextReqSendEvent` in `src/bootcamp/inspector-gadget/inspector_gadget.cc` looks like before the changes. +This is how `scheduleNextReqSendEvent` in `src/bootcamp/inspector-gadget/inspector_gadget.cc` looks before the changes. ```cpp void @@ -1872,7 +1873,7 @@ InspectorGadget::scheduleNextReqSendEvent(Tick when) } ``` -This is how it should look like after the changes. +This is how it should look after the changes. ```cpp void @@ -1900,7 +1901,7 @@ Now, let's go ahead and change the definition of `processNextReqSendEvent`. We n 3- Remove scheduling of `nextReqRetryEvent`. 4- Remove measuring of `totalInspectionBufferLatency`. -This is how this function looks like right now. +This is how this function looks right now. ```cpp void @@ -1921,7 +1922,7 @@ InspectorGadget::processNextReqSendEvent() } ``` -This is how it should look like after the changes. +This is how it should look after the changes. ```cpp void @@ -1974,10 +1975,10 @@ InspectorGadget::inspectResponse(PacketPtr pkt) ``` --- - + ## InspectorGadget::processNextRespSendEvent -Now, let's go ahead and call `inspectResponse` in the response path. Do it by adding a function call to `inspectResponse` in `processNextRespSendEvent`. This is how the function should look like after the changes. +Now, let's go ahead and call `inspectResponse` in the response path. Do it by adding a function call to `inspectResponse` in `processNextRespSendEvent`. This is how the function should look after the changes. ```cpp void @@ -2014,7 +2015,7 @@ scons build/NULL/gem5.opt -j$(nproc) Now that we have added a new parameter to class `InspectorGadget`, let's go ahead and extend `InspectedMemory` to expose `output_buffer_entries` as an argument to its constructor (`__init__`). -Open `configs/bootcamp/inspector-gadget/components/inspected_memory.py` and make the appropriate changes. You only need to change `InspecteMemory.__init__`. This is how the function should look like after the changes. +Open `configs/bootcamp/inspector-gadget/components/inspected_memory.py` and make the appropriate changes. You only need to change `InspectedMemory.__init__`. This is how the function should look after the changes. ```python class InspectedMemory(ChanneledMemory): @@ -2055,7 +2056,7 @@ Now, let's remove `data_limit` when configuring `HybridGenerator` in `configs/in ```python generator = HybridGenerator( num_cores=6, - rate="1GB/s", + rate="1GiB/s", duration="1ms", ) ``` @@ -2073,7 +2074,7 @@ system.memory.inspectors0.numReqRespDisplacements 14 system.memory.inspectors1.numReqRespDisplacements 6 # Number of request-response displacements. (Count) ``` -**PRACTICE**: Add as stat for `totalOutputBufferLatency`. +**PRACTICE**: Add a stat for `totalOutputBufferLatency`. --- @@ -2156,7 +2157,7 @@ Now, let's add the following lines to the initialization list in `InspectorGadge Now, we need to account for inspection units being available when scheduling `nextInspectionEvent`. To do this we will need to find the first available inspection unit. We should not schedule `nextInspectionEvent` earlier than that. -Let's go ahead and change `scheduleNextInspectionEvent` in `src/bootcamp/inspector-gadget/inspector_gadget.cc`. This is how the function should look like after the changes. +Let's go ahead and change `scheduleNextInspectionEvent` in `src/bootcamp/inspector-gadget/inspector_gadget.cc`. This is how the function should look after the changes. ```cpp void @@ -2238,7 +2239,7 @@ Let's take a deeper look at how we process `nextInspectionEvent`. It's important 1- We want to impose a limit on the number of entries at the front we will look at each time we process `nextInspectionEvent`. So we keep a copy of `inspectionWindow` and decrement each time we inspect a `request`. We break out of the loop as soon as `insp_window_left == 0` 2- Iterations of the for-loop represent assignment of inspection to one inspection unit. Therefore, we need to check the following: **a**) if the inspection unit is available at `curTick`, **b**) if `inspectionBuffer` has items, and **c**) if there is an entry available in `outputBuffer`. We skip the inspection units that are busy and will break out of the loop when **b** or **c** do not hold. -3- If every check has passed, we can now do the actual inspection and impose the latency. Now that we have a parameter for the latency of inspection, we will push `pkt` with a timestamp that is equivalent to `curTick + totalInspectionLatency` (this is what `clockEdge(totalInspectionLatency)` returns). We also need to make the inspection unit that we just assigned work to until the same time. +3- If every check has passed, we can now do the actual inspection and impose the latency. Now that we have a parameter for the latency of inspection, we will push `pkt` with a timestamp that is equivalent to `curTick + totalInspectionLatency` (this is what `clockEdge(totalInspectionLatency)` returns). We also need to make the inspection unit that we just assigned work until the same time. 4- After all the assignment of work is done, we need to make sure that the available time of all inspection units are bigger than or equal to `nextCycle`. Why? --- diff --git a/slides/03-Developing-gem5-models/05-modeling-cores.md b/slides/03-Developing-gem5-models/05-modeling-cores.md index db5b13d..86f4b52 100644 --- a/slides/03-Developing-gem5-models/05-modeling-cores.md +++ b/slides/03-Developing-gem5-models/05-modeling-cores.md @@ -88,7 +88,7 @@ It is constructed from information in the `StaticInst` object. It contains information on: -- PC and predicated next-PC +- PC and predicted next-PC - Instruction result - Thread number - CPU @@ -342,7 +342,7 @@ Then follows through to: ``` This is the function that calls the `execute` function of the `StaticInst` object which will carry out all the work for the instruction. -**Note:** This is because `Add` non-memory instruction. Memory instructions are immediately executed. Without memory accesses instructions are simulated as being instantaneous. +**Note:** This is because `Add` is a non-memory instruction. Memory instructions are immediately executed. Without memory accesses instructions are simulated as being instantaneous. --- @@ -368,7 +368,7 @@ The latter two functions are used for memory instructions such as Timed memory a ## The gem5 ISA Parser -So far we've seen, how an instruction is decoded then executed in gem5. +So far we've seen how an instruction is decoded then executed in gem5. However, we haven't seen how this decoding process is defined and the behavior of the instrucion's execution is defined. This is where it gets complicated... @@ -397,13 +397,13 @@ The painful truth is that to extend or add to an ISA most developers will `grep` ## Let's try to understand one RISC-V instruction -In the following we are going to look at the `LW` instruction in the RISC-V and how it is specified, decoded, and executed in gem5. +In the following we are going to look at the `LW` instruction in RISC-V and how it is specified, decoded, and executed in gem5. --- ## The RISC-V instruction formats -To understand the RISC-V ISA, and how the gem5 RISC-V decoder works, we need to understand the base instruction formats. +To understand the RISC-V ISA and how the gem5 RISC-V decoder works, we need to understand the base instruction formats. The base instruction formats are the R, I, S, B, U, and J types which use the following formats: ![55% bg](05-modeling-cores-img/riscv-32bit-inst-format.png) @@ -484,7 +484,7 @@ The decoder uses these bitfields to decode the instruction. --- -Go to "decoder.isa" and search for the `lw` instruction +Go to [decoder.isa](../../gem5/src/arch/riscv/isa/decoder.isa) and search for the `lw` instruction. The following shows the path to the instruction definition via parsing of the instruction's `opcode` and `funct3` fields: @@ -511,7 +511,7 @@ decode QUADRANT default Unknown::unknown() { ### Generating code from the LW ISA definition -You can compare side by side decoder.isa and decode-method.cc.inc to see how the ISA definition is used to generate the CPP decoder code. +You can compare [decoder.isa](../../gem5/src/arch/riscv/isa/decoder.isa) and [decode-method.cc.inc](../../materials/03-Developing-gem5-models/05-modeling-cores/build-riscv-generated-files/decode-method.cc.inc) side by side to see how the ISA definition is used to generate the CPP decoder code. This is done by the ISA parser script (isa_parser.py) which is used by the gem5 build system to generate the CPP code. @@ -637,7 +637,7 @@ You can follow this through to see how this constructor is generated but it's a --- -From "decoder-ns.hh.inc", you can see the generated class definition for the `Lw` instruction: +From [decoder-ns.hh.inc](../../materials/03-Developing-gem5-models/05-modeling-cores/build-riscv-generated-files/decoder-ns.hh.inc), you can see the generated class definition for the `Lw` instruction: ```cpp class Lw : public Load @@ -701,7 +701,7 @@ ADD16, Rs1, Rs2 --- -Lets run the [materials/03-Developing-gem5-models/05-modeling-cores/02-add16-instruction](../../materials/03-Developing-gem5-models/05-modeling-cores/02-add16-instruction/add16_test.py) +Lets run the [materials/03-Developing-gem5-models/05-modeling-cores/02-add16-instruction](../../materials/03-Developing-gem5-models/05-modeling-cores/02-add16-instruction/add16_test.py). This file runs the binary for [add16_test.c](../../materials/03-Developing-gem5-models/05-modeling-cores/02-add16-instruction/src/add16_test.c). This is a C program that executes the `add 16` instruction. diff --git a/slides/03-Developing-gem5-models/06-modeling-cache-coherence.md b/slides/03-Developing-gem5-models/06-modeling-cache-coherence.md index eacc92a..f9cf077 100644 --- a/slides/03-Developing-gem5-models/06-modeling-cache-coherence.md +++ b/slides/03-Developing-gem5-models/06-modeling-cache-coherence.md @@ -74,9 +74,9 @@ Single-Writer Multiple-Reader (SWMR) invariant ## Ruby Components -- **Controller Models** *(e.g, caches)*: Manage coherence state and issue requests -- **Controller Topology** *(how the caches are connected)*: Determines how messages are routed -- **Interconnect Model** *(e.g., on-chip routers)*: Determines performance of routing +- **Controller Models** *(e.g, caches)*: Manage coherence state and issue requests. +- **Controller Topology** *(how the caches are connected)*: Determines how messages are routed. +- **Interconnect Model** *(e.g., on-chip routers)*: Determines performance of routing. - **Interface** *(how to get messages in/out of Ruby)* > **Note**: The main goal of Ruby is ***flexibility***, not ***usability***. @@ -87,7 +87,7 @@ Single-Writer Multiple-Reader (SWMR) invariant - Implemented in "SLICC" - **S**pecification **L**anguage for **I**ncluding **C**ache **C**oherence -- SLICC is a domain-specific language +- SLICC is a domain-specific language. - Describes the coherence protocol - Generates C++ code - See `build/.../mem/ruby/protocol` for generated files (but you really don't want to read these.) @@ -124,15 +124,15 @@ Single-Writer Multiple-Reader (SWMR) invariant ## Cache state machine outline -- **Parameters**: These are the `SimObject` parameters (and some special things) - - **Cache memory**: Where the data is stored - - **Message buffers**: Sending and receiving messages from the network -- **State declarations**: The stable and transient states -- **Event declarations**: State machine events that will be "triggered" +- **Parameters**: These are the `SimObject` parameters (and some special things). + - **Cache memory**: Where the data is stored. + - **Message buffers**: Sending and receiving messages from the network. +- **State declarations**: The stable and transient states. +- **Event declarations**: State machine events that will be "triggered". - **Other structures and functions**: Entries, TBEs, get/setState, etc. -- **In ports**: Trigger events based on incoming messages -- **Actions**: Execute single operations on cache structures -- **Transitions**: Move from state to state and execute actions +- **In ports**: Trigger events based on incoming messages. +- **Actions**: Execute single operations on cache structures. +- **Transitions**: Move from state to state and execute actions. **In ports** read **Cache memory** then *trigger* **Events**. **Events** cause **Transitions** based on the **State** which execute **Actions**. @@ -142,10 +142,10 @@ Single-Writer Multiple-Reader (SWMR) invariant ## Cache memory -- See `src/mem/ruby/structures/CacheMemory` -- Stores the cache data (in an `Entry` as defined in the SLICC file) -- Can use the function `cacheProbe()` to get the replacement address when a cache miss occurs - - Interacts with replacement policies in `src/mem/cache/replacement_policies` +- See `src/mem/ruby/structures/CacheMemory`. +- Stores the cache data (in an `Entry` as defined in the SLICC file). +- Can use the function `cacheProbe()` to get the replacement address when a cache miss occurs. + - Interacts with replacement policies in `src/mem/cache/replacement_policies`. > **IMPORTANT**: Always call `setMRU()` when you access an `Entry` otherwise the replacement policy won't work. @@ -153,6 +153,8 @@ Single-Writer Multiple-Reader (SWMR) invariant --- + + ## Message buffers ```c++ @@ -164,11 +166,11 @@ MessageBuffer * forwardFromDir, network="From", virtual_network="1", vnet_type=" - The to/from declares them as either "in_port" type or "out_port" type. - Virtual network is required when some messages have higher priority than others. - `vnet_type` is the message type. "Response" means that the message carries data and is used in Garnet for counting buffer credits. -- Message buffers have the following interface - - `peek()`: Get the head message +- Message buffers have the following interface: + - `peek()`: Get the head message. - `pop()`: Remove the head message (Don't forget this or you'll have deadlock!) - - `isReady()`: Check if there is a message to read - - `recycle()`: Take the head message and put it on the tail (useful to get blocking messages out of the way) + - `isReady()`: Check if there is a message to read. + - `recycle()`: Take the head message and put it on the tail (useful to get blocking messages out of the way). - `stallAndWait()`: Move the head message to a separate queue (don't forget to call `wakeUpDependents()` later!) --- @@ -197,11 +199,11 @@ cp -r materials/03-Developing-gem5-models/06-modeling-cache-coherence/MyMSI* gem ## Declaring a protocol -Modify [`src/mem/ruby/protocol/MyMSI.slicc`](../../gem5/src/mem/ruby/protocol/MyMSI.slicc) +Modify [`src/mem/ruby/protocol/MyMSI.slicc`](../../gem5/src/mem/ruby/protocol/MyMSI.slicc). -- Need to tell Scons about the state machine files -- In a file called `.slicc` -- You can use the same state machine (`.sm`) files for multiple protocols +- Need to tell Scons about the state machine files. +- In a file called `.slicc`. +- You can use the same state machine (`.sm`) files for multiple protocols. - Usually, you want to do this in the [`src/mem/ruby/protocol`](../../gem5/src/mem/ruby/protocol/) directory. ```text @@ -219,7 +221,7 @@ include "MyMSI-dir.sm"; ## Declaring the message types -Modify [`src/mem/ruby/protocol/MyMSI-msg.sm`](../../gem5/src/mem/ruby/protocol/MyMSI-msg.sm) +Modify [`src/mem/ruby/protocol/MyMSI-msg.sm`](../../gem5/src/mem/ruby/protocol/MyMSI-msg.sm). ```c++ enumeration(CoherenceRequestType, desc="Types of request messages") { @@ -239,7 +241,7 @@ enumeration(CoherenceResponseType, desc="Types of response messages") { ## Message buffers for the directory -Modify [`src/mem/ruby/protocol/MyMSI-dir.sm`](../../gem5/src/mem/ruby/protocol/MyMSI-dir.sm) +Modify [`src/mem/ruby/protocol/MyMSI-dir.sm`](../../gem5/src/mem/ruby/protocol/MyMSI-dir.sm). ```c++ // Forwarding requests from the directory *to* the caches. @@ -311,7 +313,7 @@ build/ALL_MyMSI/gem5.opt configs/learning_gem5/part3/simple_ruby.py ``` While we're waiting on the compilation, let's look at some of the details of the code. -(It is way too much code to write all yourself today... so let's just read it) +(It is way too much code to write all yourself today... so let's just read it). --- @@ -319,7 +321,7 @@ While we're waiting on the compilation, let's look at some of the details of the ## Let's look at some code: In-port definition -From [`gem5/src/learning_gem5/part3/MSI-cache.sm`](../../gem5/src/learning_gem5/part3/MSI-cache.sm) +From [`gem5/src/learning_gem5/part3/MSI-cache.sm`](../../gem5/src/learning_gem5/part3/MSI-cache.sm). ```c++ in_port(mandatory_in, RubyRequest, mandatoryQueue) { @@ -354,7 +356,7 @@ in_port(mandatory_in, RubyRequest, mandatoryQueue) { ## State declarations -See [`gem5/src/mem/ruby/protocol/MSI-cache.sm`](../../gem5/src/mem/ruby/protocol/MSI-cache.sm) +See [`gem5/src/mem/ruby/protocol/MSI-cache.sm`](../../gem5/src/mem/ruby/protocol/MSI-cache.sm). ```c++ state_declaration(State, desc="Cache states") { @@ -369,14 +371,14 @@ state_declaration(State, desc="Cache states") { } ``` -**`AccessPermission:...`**: Used for functional accesses -**`IS_D`**: Invalid, waiting for data to move to shared +**`AccessPermission:...`**: Used for functional accesses. +**`IS_D`**: Invalid, waiting for data to move to shared. --- ## Event declarations -See [`gem5/src/mem/ruby/protocol/MSI-cache.sm`](../../gem5/src/mem/ruby/protocol/MSI-cache.sm) +See [`gem5/src/mem/ruby/protocol/MSI-cache.sm`](../../gem5/src/mem/ruby/protocol/MSI-cache.sm). ```c++ enumeration(Event, desc="Cache events") { @@ -398,16 +400,16 @@ enumeration(Event, desc="Cache events") { ## Other structures and functions -See [`gem5/src/mem/ruby/protocol/MSI-cache.sm`](../../gem5/src/mem/ruby/protocol/MSI-cache.sm) +See [`gem5/src/mem/ruby/protocol/MSI-cache.sm`](../../gem5/src/mem/ruby/protocol/MSI-cache.sm). -- **Entry**: Declare the data structure for each entry +- **Entry**: Declare the data structure for each entry. - Block data, block state, sometimes others (e.g., tokens) - **TBE/TBETable**: Transient Buffer Entry - - Like an MSHR, but not exactly (allocated more often) - - Holds data for blocks in transient states + - Like an MSHR, but not exactly (allocated more often). + - Holds data for blocks in transient states. - **get/set State, AccessPermissions, functional read/write** - - Required to implement AbstractController - - Usually just copy-paste from examples + - Required to implement AbstractController. + - Usually just copy-paste from examples. --- @@ -415,7 +417,7 @@ See [`gem5/src/mem/ruby/protocol/MSI-cache.sm`](../../gem5/src/mem/ruby/protocol Not gem5 ports! -- **out_port**: "Rename" the message buffer and declare message type +- **out_port**: "Rename" the message buffer and declare message type. - **in_port**: Much of the SLICC "magic" here. - Called every cycle - Look at head message @@ -461,10 +463,10 @@ action(sendGetM, "gM", desc="Send GetM to the directory") { } ``` -**`enqueue`** is like `peek`, but it automatically populates `out_msg` +**`enqueue`** is like `peek`, but it automatically populates `out_msg`. Some variables are implicit in actions. These are passed in via `trigger()` in `in_port`. -These are `address`, `cache_entry`, `tbe` +These are `address`, `cache_entry`, `tbe`. --- @@ -483,9 +485,9 @@ transition({IM_AD, SM_AD}, {DataDirNoAcks, DataOwner}, M) { } ``` -- **`(I, Store, IM_AD)`**: From state `I` on event `Store` to state `IM_AD` -- **`({IM_AD, SM_AD}, {DataDirNoAcks, DataOwner}, M)`**: From either `IM_AD` or `SM_AD` on either `DataDirNoAcks` or `DataOwner` to state `M` -- Almost always `pop` at the end +- **`(I, Store, IM_AD)`**: From state `I` on event `Store` to state `IM_AD`. +- **`({IM_AD, SM_AD}, {DataDirNoAcks, DataOwner}, M)`**: From either `IM_AD` or `SM_AD` on either `DataDirNoAcks` or `DataOwner` to state `M`. +- Almost always `pop` at the end. - Don't forget to use stats! --- @@ -504,7 +506,7 @@ You will: 4. Test the protocol 5. Find a bug 6. Fix the bug -7. Test with the ruby random tester +7. Test with the Ruby random tester --- @@ -529,13 +531,13 @@ system.caches.controllers0 time: 73 addr: 0x9100 event: DataDirNoAcks state: IS_ build/ALL_MyMSI/gem5.opt --debug-flags=ProtocolTrace configs/learning_gem5/part3/simple_ruby.py ``` -Start fixing the errors and fill in the `MyMSI-cache.sm` +Start fixing the errors and fill in the `MyMSI-cache.sm`. --- ## Fixing the errors: Missing transition -- Missing IS_D transition in cache +- Missing IS_D transition in cache. - write the data to the cache - deallocate the TBE - mark that this is an "external load hit" @@ -554,10 +556,10 @@ transition(IS_D, {DataDirNoAcks, DataOwner}, S) { ## Fixing the errors: Missing action -- Fill in the "write data to cache" action +- Fill in the "write data to cache" action. - Get the data out of the message (how to get the message?) - Set the cache entry's data (how? where does `cache_entry` come from?) - - Make sure to have `assert(is_valid(cache_entry))` + - Make sure to have `assert(is_valid(cache_entry))`. ```c++ action(writeDataToCache, "wd", desc="Write data to the cache") { @@ -624,8 +626,8 @@ build/ALL_MyMSI/gem5.opt --debug-flags=ProtocolTrace configs/learning_gem5/part3 - Wow! now it should be way faster to see the error! - Now, you need to handle this in the cache! `transition(S, Inv, I)` - If you get an invalidate... - - Send an ack, let the CPU know that this line was invalidated, deallocate the block, pop the queue -- So, now, hmm, it looks like it works??? But here's still one more + - Send an ack, let the CPU know that this line was invalidated, deallocate the block, pop the queue. +- So, now, hmm, it looks like it works??? But here's still one more. - Some transitions are very rare: `transition(I, Store, IM_AD)` - Try varying the parameters of the tester (without `ProtocolTrace`!) to find a combination which triggers an error (100000 checks, 8 CPUs, 50ns memory...) - Now, you can fix the error! @@ -652,10 +654,9 @@ transition(I, Store,IM_AD) {} ``` -Run Scons and the Python script again +Run Scons and the Python script again. - ------- +--- @@ -695,9 +696,9 @@ build/ALL_MyMSI/gem5.opt --debug-flags=ProtocolTrace configs/learning_gem5/part3 ## Fixing the error: What to do on a store - Fix the next error (what to do on a store??) - - Allocate a block, allocate a TBE, send a message, pop the queue + - Allocate a block, allocate a TBE, send a message, pop the queue. - Also make sure that all actions that you need - - When sending, you need to construct a new message. See `RequestMsg` in `MyMSI-msg.sm` + - When sending, you need to construct a new message. See `RequestMsg` in `MyMSI-msg.sm`. ```c++ action(sendGetM, "gM", desc="Send GetM to the directory") { @@ -713,7 +714,7 @@ build/ALL_MyMSI/gem5.opt --debug-flags=ProtocolTrace configs/learning_gem5/part3 } ``` -Run Scons and Python script +Run Scons and Python script. --- @@ -747,7 +748,7 @@ build/ALL_MyMSI/gem5.opt configs/learning_gem5/part3/ruby_test.py ## Now that it's working... look at the stats -Re-run the simple pthread test and lets look at some stats! +Re-run the simple Python test and lets look at some stats! ```sh build/ALL_MyMSI/gem5.opt configs/learning_gem5/part3/simple_ruby.py @@ -774,23 +775,23 @@ build/ALL_MyMSI/gem5.opt configs/learning_gem5/part3/simple_ruby.py `grep RequestTypeMachineType.ST.L1Cache.miss_type_mach_latency_hist_seqr::mean m5out/stats.txt` 18 `grep RequestTypeMachineType.LD.L1Cache.miss_type_mach_latency_hist_seqr::mean` -- multiply by sample size (...::sample) and then add together +- multiply by sample size (...::sample) and then add together. --- ## Ruby config scripts - Don't follow gem5 style closely :( -- Require lots of boilerplate -- Standard Library does a much better job +- Require lots of boilerplate. +- Standard Library does a much better job. ### What's needed in these scripts? 1. Instantiate the controllers -Here is where you pass all of the parameters to the `.sm` files -2. Create a `Sequencer` for each CPU (and DMA, etc.) -More details in a moment -3. Create and connect all of the network routers +Here is where you pass all of the parameters to the `.sm` files. +2. Create a `Sequencer` for each CPU (and DMA, etc.). +More details in a moment. +3. Create and connect all of the network routers. --- @@ -798,9 +799,9 @@ More details in a moment - You can connect the routers any way you like: - Mesh, torus, ring, crossbar, dragonfly, etc. -- Usually hidden in `create_topology` (see configs/topologies) - - Problem: These make assumptions about controllers - - Inappropriate for non-default protocols +- Usually hidden in `create_topology` (see configs/topologies). + - Problem: These make assumptions about controllers. + - Inappropriate for non-default protocols. After creating the topology (before simulation), Ruby's network model will find all of the valid paths from one node to another in the on-chip network. Thus, the OCN is completely separate from the types of controllers and the protocol. @@ -822,11 +823,11 @@ for ri in self.routers: self.int_links.append(SimpleIntLink(link_id = link_count, src_node = ri, dst_node = rj)) ``` -- **`self.routers`**: One router per controller in this case of point-to-point - - Must have a router for "internal" links -- **`self.ext_links`**: Connects the controller to the router - - You can have multiple external links per router, but not for this point-to-point example -- **`self.int_links`**: Connects the routers to each other +- **`self.routers`**: One router per controller in this case of point-to-point. + - Must have a router for "internal" links. +- **`self.ext_links`**: Connects the controller to the router. + - You can have multiple external links per router, but not for this point-to-point example. +- **`self.int_links`**: Connects the routers to each other. --- diff --git a/slides/03-Developing-gem5-models/07-chi-protocol.md b/slides/03-Developing-gem5-models/07-chi-protocol.md index 3e8fcd2..f771e0c 100644 --- a/slides/03-Developing-gem5-models/07-chi-protocol.md +++ b/slides/03-Developing-gem5-models/07-chi-protocol.md @@ -13,7 +13,7 @@ title: Using gem5's implementation of the CHI Protocol ## Example -- Let's build a simple two-level cache hiearchy +- Let's build a simple two-level cache hierarchy. - Private L1 caches - Shared L2/directory (home node) @@ -25,8 +25,8 @@ Code in [`materials/03-Developing-gem5-models/07-chi-protocol`](../../materials/ ## Use some components -- There are some components already available for CHI - - Just a `private_l1_moesi_cache` for now +- There are some components already available for CHI. + - Just a `private_l1_moesi_cache` for now. - Point-to-point network See [`gem5/src/python/gem5/components/cachehierarchies/chi/nodes/private_l1_moesi_cache.py`](../../gem5/src/python/gem5/components/cachehierarchies/chi/nodes/private_l1_moesi_cache.py). @@ -86,7 +86,7 @@ self.allow_SD = True ## Set more CHI parameters -MOESI / Mostly inclusive for shared / Exclusive for unique +MOESI / Mostly inclusive for shared / Exclusive for unique. ```python self.alloc_on_seq_acc = False @@ -106,7 +106,7 @@ self.dealloc_backinv_shared = False ## Now, let's create the hierarchy -Set the parameters we care about (and ignore others) +Set the parameters we care about (and ignore others). ```python class PrivateL1SharedL2CacheHierarchy(AbstractRubyCacheHierarchy): @@ -145,7 +145,7 @@ def incorporate_cache(self, board): ## Next, let's create the run script -First, let's use the traffic generator. Put the following code in `run_test.py` +First, let's use the traffic generator. Put the following code in `run_test.py`. ```python from hierarchy import PrivateL1SharedL2CacheHierarchy @@ -155,7 +155,7 @@ board = TestBoard( cache_hierarchy=PrivateL1SharedL2CacheHierarchy( l1_size="32KiB", l1_assoc=8, l2_size="2MiB", l2_assoc=16, ), - memory=SingleChannelDDR4_2400(size="2GB"), + memory=SingleChannelDDR4_2400(size="2GiB"), clk_freq="3GHz", ) sim = Simulator(board) @@ -170,7 +170,7 @@ sim.run() > gem5-chi run_test.py ``` -stats.txt +stats.txt: ```text simSeconds 0.001000 @@ -210,7 +210,7 @@ cache_hierarchy = PrivateL1SharedL2CacheHierarchy( gem5 run-is.py ``` -You should see the following output pretty quickly +You should see the following output pretty quickly. ```text ... @@ -235,12 +235,12 @@ board.processor.switch0.core.commitStats0.ipc 0.149605 We have an average miss latency of 185 cycles (lots of L2 misses!) and an IPC of 0.15. -### Note: This example has not been debugged and may have FS issues +### Note: This example has not been debugged and may have FS issues. --- ## Summary -- We've created a simple two-level cache hierarchy using the CHI protocol -- We've run a simple traffic generator and a full system simulation -- We've seen how to set up the CHI protocol in gem5 with the standard library +- We've created a simple two-level cache hierarchy using the CHI protocol. +- We've run a simple traffic generator and a full system simulation. +- We've seen how to set up the CHI protocol in gem5 with the standard library. diff --git a/slides/03-Developing-gem5-models/08-ruby-network.md b/slides/03-Developing-gem5-models/08-ruby-network.md index 4bda029..146d8a2 100644 --- a/slides/03-Developing-gem5-models/08-ruby-network.md +++ b/slides/03-Developing-gem5-models/08-ruby-network.md @@ -94,7 +94,7 @@ Building off of [CHI protocol](07-chi-protocol.md) ## Create the topology file -Open [../../materials/03-Developing-gem5-models/08-ruby-network/ring.py](../../materials/03-Developing-gem5-models/08-ruby-network/ring.py) +Open [../../materials/03-Developing-gem5-models/08-ruby-network/ring.py](../../materials/03-Developing-gem5-models/08-ruby-network/ring.py). Note: There are a lot off oddities in this code. Most of it, you'll just have to take my word for it... @@ -193,7 +193,7 @@ if dma_ctrls: ## Create internal links This is where we create our ring. -For something different, let's do a uni-directional ring. +For something different, let's do a unidirectional ring. ```python self.int_links = [ diff --git a/slides/03-Developing-gem5-models/09-extending-gem5-models.md b/slides/03-Developing-gem5-models/09-extending-gem5-models.md index 9ed036b..5ae6434 100644 --- a/slides/03-Developing-gem5-models/09-extending-gem5-models.md +++ b/slides/03-Developing-gem5-models/09-extending-gem5-models.md @@ -51,7 +51,7 @@ There are three components related to probe point in gem5: - Profiling a component without adding too much to the component's codebase - Making more flexible exit events - Tracking advance behaviors -- More +- And more! --- @@ -59,10 +59,10 @@ There are three components related to probe point in gem5: ## More about Probe Point -- Every SimObject has a ProbeManager -- The ProbeManager manages all registered ProbePoints and the connected ProbeListeners for the SimObject -- One ProbePoint can notify multiple ProbeListeners, and one ProbeListener can listen to multiple ProbePoints -- One ProbeListener can only attach to one SimObject +- Every SimObject has a ProbeManager. +- The ProbeManager manages all registered ProbePoints and the connected ProbeListeners for the SimObject. +- One ProbePoint can notify multiple ProbeListeners, and one ProbeListener can listen to multiple ProbePoints. +- One ProbeListener can only attach to one SimObject. ![](09-extending-gem5-models-imgs/probepoint-diagram.drawio.svg) @@ -70,10 +70,10 @@ There are three components related to probe point in gem5: ## How to use Probe Point? -1. Create a ProbePoint in a SimObject -2. Register the ProbePoint with the SimObject's ProbeManager -3. Create a ProbeListener -4. Connect the ProbeListener to the SimObject and register it with the SimObject's ProbeManager +1. Create a ProbePoint in a SimObject. +2. Register the ProbePoint with the SimObject's ProbeManager. +3. Create a ProbeListener. +4. Connect the ProbeListener to the SimObject and register it with the SimObject's ProbeManager. Let's try it with a simple example! @@ -83,13 +83,13 @@ Let's try it with a simple example! ### 01-local-inst-tracker -Currently, gem5 does not have a straight-forward method to raise an exit event after we execute (commit) a number of instructions for multi-core simulation. We can easily create one with Probe Point. We will start with creating a ProbeListener that listens to each core's `ppRetiredInsts` ProbePoint, then in `02-global-inst-tracker`, we will create a SimObject to manage all the ProbeListeners to raise an exit event after the simulation executes (commits) a number of instructions. +Currently, gem5 does not have a straightforward method to raise an exit event after we execute (commit) a number of instructions for multi-core simulation. We can easily create one with Probe Point. We will start with creating a ProbeListener that listens to each core's `ppRetiredInsts` ProbePoint, then in `02-global-inst-tracker`, we will create a SimObject to manage all the ProbeListeners to raise an exit event after the simulation executes (commits) a number of instructions. ### Goal -1. Create a ProbeListener called the local-instruction-tracker -2. Connect the ProbeListener to the BaseCPU and register our ProbeListener with the BaseCPU's ProbeManager -3. Run a simple simulation with the local-instruction-tracker +1. Create a ProbeListener called `local-instruction-tracker`. +2. Connect the ProbeListener to the BaseCPU and register our ProbeListener with the BaseCPU's ProbeManager. +3. Run a simple simulation with the `local-instruction-tracker`. --- @@ -113,6 +113,7 @@ In the `inst_tracker.hh` file, we need to include the headers and necessary libr ``` --- + ## 01-local-inst-tracker @@ -130,7 +131,7 @@ class LocalInstTracker : public ProbeListenerObject } ``` -Now, we have a constructor for the `LocalInstTracker` and a virtual function `regProbeListeners()`. The `regProbeListeners` is called automatically when the simulation starts. We will use it to attach to the ProbePoint. +Now, we have a constructor for the `LocalInstTracker` and a virtual function `regProbeListeners()`. The function `regProbeListeners` is called automatically when the simulation starts. We will use it to attach to the ProbePoint. --- @@ -139,9 +140,9 @@ Now, we have a constructor for the `LocalInstTracker` and a virtual function `re Our goal is to count the number of committed instructions for our attached core so we can listen to the `ppRetiredInsts` ProbePoint that already exists in the `BaseCPU` SimObject. Let's look at the `ppRetiredInsts` ProbePoint a bit. -It is a `PMU probe point` that as suggested in [src/cpu/base.hh](https://github.com/gem5/gem5/blob/stable/src/sim/probe/pmu.hh) that it will notify the listeners with a `uint64_t` variable. +It is a `PMU probe point` that will notify the listeners with a `uint64_t` variable, as suggested in [src/sim/probe/pmu.hh](https://github.com/gem5/gem5/blob/stable/src/sim/probe/pmu.hh) and [src/cpu/base.hh](https://github.com/gem5/gem5/blob/stable/src/cpu/base.hh). In [src/cpu/base.cc:379](https://github.com/gem5/gem5/blob/stable/src/cpu/base.cc#L379), we can see that it is registered to the `BaseCPU` SimObject's ProbeManager with the string `"RetiredInsts"`. All ProbePoints are registered with the ProbeManager with a unique string variable, so we can use this string later to attach our listeners to this ProbePoint. Lastly, we can find that this ProbePoint notifies its listeners with an integer `1` when there is an instruction committed in [src/cpu/base.cc:393](https://github.com/gem5/gem5/blob/stable/src/cpu/base.cc#L393). -Now that we know what ProbePoint we are targeting, we can set it up for our LocalInstTracker. +Now that we know what ProbePoint we are targeting, we can set it up for our `LocalInstTracker`. --- @@ -149,13 +150,13 @@ Now that we know what ProbePoint we are targeting, we can set it up for our Loca In the `inst_tracker.hh`, we need to add two things: -1. The type of argument we are going to receive from the ProbePoint. In our case here is a `uint64_t` variable +1. The type of argument we are going to receive from the ProbePoint. In our case it is a `uint64_t` variable. ```cpp typedef ProbeListenerArg LocalInstTrackerListener; ``` -2. We need to have a function to handle the notification from the ProbePoint. Since we are counting the number of instructions committed and wanting to exit when it reaches a certain threshold, let's also create two `uint64_t` variables for this purpose +2. We need to have a function to handle the notification from the ProbePoint. Since we are counting the number of instructions committed and want to exit when it reaches a certain threshold, let's also create two `uint64_t` variables for this purpose. ```cpp void checkPc(const uint64_t& inst); @@ -167,9 +168,9 @@ uint64_t instThreshold; ## 01-local-tracker -Here comes an optional part. The Probe Point tool allows dynamic attachment and detachment during the simulation. Therefore, we can create a way to start and stop listening for our LocalInstTracker. +Here comes an optional part. The Probe Point tool allows dynamic attachment and detachment during the simulation. Therefore, we can create a way to start and stop listening for our `LocalInstTracker`. -In the `inst_tracker.hh`, +In the `inst_tracker.hh`, add the following: ```cpp bool listening; @@ -184,7 +185,7 @@ void startListening() { ## 01-local-tracker -In the `inst_tracker.cc`, let's define the constructor fist +In `inst_tracker.cc`, let's define the constructor first. ```cpp LocalInstTracker::LocalInstTracker(const LocalInstTrackerParams &p) @@ -195,13 +196,13 @@ LocalInstTracker::LocalInstTracker(const LocalInstTrackerParams &p) {} ``` -This means that we initialize the `instCount` as 0, `instThreshold` with the parameter `inst_threshold`, and listening with the parameter `start_listening`. +This means that we initialize the `instCount` as 0, `instThreshold` with the parameter `inst_threshold`, and `listening` with the parameter `start_listening`. --- ## 01-local-tracker -Then, let's define the `regProbeListeners` function, which will be called automatically when the simulation starts, also as we defined above when `startListening` is called. +Then, let's define the `regProbeListeners` function, which will be called automatically when the simulation starts, and also when `startListening` is called, as we defined above. ```cpp void @@ -220,7 +221,7 @@ As we can see, it uses the `LocalInstTrackerListener` type that we defined earli ## 01-local-tracker -For our `checkPc` function, it should count the instruction committed, check if it reaches the threshold, then raises an exit event when it does. +For our `checkPc` function, it should count the instructions committed, check if it reaches the threshold, then raise an exit event when it does. ```cpp void @@ -237,7 +238,7 @@ The `exitSimLoopNow` will create an event immediately, with the string variable. --- -Lastly, let's defined the `stopListening` function for dynamic detachment +Lastly, let's defined the `stopListening` function for dynamic detachment. ```cpp void @@ -251,9 +252,9 @@ LocalInstTracker::stopListening() } ``` -This is a really rough example of how it can be done. It does not check what ProbePoint the listeners are attaching to, so if our ProbeListener listens to multiple ProbePoints, we will need to check the registered string variables for detaching the correct ProbeListeners. -For our simple case here, this rough method will serve the purpose. -For more detailed information about how the dynamic detachment can be done, please refer to [src/sim/probe/probe.hh](https://github.com/gem5/gem5/blob/stable/src/sim/probe/probe.hh) +This is a really rough example of how it can be done. It does not check what ProbePoint the listeners are attached to, so if our ProbeListener listens to multiple ProbePoints, we will need to check the registered string variables to detach the correct ProbeListeners. +For our simple case here, this rough method will serve our purpose. +For more detailed information about how dynamic detachment can be done, please refer to [src/sim/probe/probe.hh](https://github.com/gem5/gem5/blob/stable/src/sim/probe/probe.hh). --- @@ -278,11 +279,11 @@ uint64_t getThreshold() const { --- - + ## 01-local-inst-tracker -Now, let's set up the Python object of the LocalInstTracker. +Now, let's set up the Python object of the `LocalInstTracker`. Let's create a file called `InstTracker.py` under the same directory `src/cpu/probes`. ```python @@ -326,7 +327,7 @@ Source("inst_tracker.cc") Now we have everything setup for our `LocalInstTracker`! -Let's build gem5 again +Let's build gem5 again: ```bash cd gem5 @@ -335,16 +336,18 @@ scons build/X86/gem5.fast -j$(nproc) --- + + ## 01-local-inst-tracker -After it is built, we can test our `LocalInstTracker` with the [materials/03-Developing-gem5-models/09-extending-gem5-models/01-local-inst-tracker/simple-sim.py](../../materials/03-Developing-gem5-models/09-extending-gem5-models/01-local-inst-tracker/simple-sim.py) +After it is built, we can test our `LocalInstTracker` with [materials/03-Developing-gem5-models/09-extending-gem5-models/01-local-inst-tracker/simple-sim.py](../../materials/03-Developing-gem5-models/09-extending-gem5-models/01-local-inst-tracker/simple-sim.py). ```bash cd /workspaces/2024/materials/03-Developing-gem5-models/09-extending-gem5-models/01-local-inst-tracker /workspaces/2024/gem5/build/X86/gem5.fast -re --outdir=simple-sim-m5out simple-sim.py ``` -This SE script runs a simple openmp workload that sums up an array of numbers. The source code of this workload can be found in [materials/03-Developing-gem5-models/09-extending-gem5-models/simple-omp-workload/simple_workload.c](../../materials/03-Developing-gem5-models/09-extending-gem5-models/simple-omp-workload/simple_workload.c). +This SE script runs a simple OpenMP workload that sums up an array of numbers. The source code of this workload can be found in [materials/03-Developing-gem5-models/09-extending-gem5-models/simple-omp-workload/simple_workload.c](../../materials/03-Developing-gem5-models/09-extending-gem5-models/simple-omp-workload/simple_workload.c). ```c m5_work_begin(0, 0); @@ -361,7 +364,7 @@ m5_work_end(0, 0); ## 01-local-inst-tracker -For our SE script, we first attach a LocalInstTracker to each core object with a threshold of 100,000 instructions. We will not start listening to the core's committed instructions from the start of the simulation. +For our SE script, we first attach a `LocalInstTracker` to each core object with a threshold of 100,000 instructions. We will not start listening to the core's committed instructions from the start of the simulation. ```python from m5.objects import LocalInstTracker @@ -378,7 +381,7 @@ for core in processor.get_cores(): ## 01-local-inst-tracker -We will start listening when the simulation raises an workbegin exit event, so we need a workbegin handler to do that +We will start listening when the simulation raises an workbegin exit event, so we need a workbegin handler to do that. ```python def workbegin_handler(): @@ -400,7 +403,7 @@ def workend_handler(): ## 01-local-inst-tracker -We know that after reaching the threshold, our LocalInstTracker will raise an `ExitEvent.MAX_INSTS` exit event, so we need a handler for it too +We know that after reaching the threshold, our `LocalInstTracker` will raise an `ExitEvent.MAX_INSTS` exit event, so we need a handler for it too. ```python def max_inst_handler(): @@ -422,7 +425,7 @@ def max_inst_handler(): ## 01-local-inst-tracker -After setting these handlers with `simulator` +After setting these handlers with `simulator`: ```python simulator = Simulator( @@ -443,7 +446,7 @@ We should expect 8 `MAX_INSTS` events after the `WORKBEGIN` event. ## 01-local-inst-tracker -We should expect to see below log in `simout.txt` +We should expect to see the below log in `simout.txt`: ```bash Global frequency set at 1000000000000 ticks per second @@ -483,9 +486,10 @@ Simulation Done ## 01-local-inst-tracker -Congratulations! We now have our LocalInstTracker! -However, this local instruction exit event can be done with the [scheduleInstStop](https://github.com/studyztp/gem5/blob/studyztp/probe-user-inst/src/cpu/BaseCPU.py#L72) function in `BaseCPU`. Our goal is to have an instruction exit event that tracks the global committed instructions, which does not have an interface to do so easily in gem5 yet. -Since each ProbeListener can only attach to one SimObject, we can modify our LocalInstTracker to notify a global object to keep tracking all committed instructions in all ProbeListeners. +Congratulations! We now have our `LocalInstTracker`! +However, this local instruction exit event can be done with the [scheduleInstStop](https://github.com/studyztp/gem5/blob/studyztp/probe-user-inst/src/cpu/BaseCPU.py#L72) function in `BaseCPU`. Our goal is to have an instruction exit event that tracks the global committed instructions, which we can't do easily as the interface does not exist in gem5 yet. + +Since each ProbeListener can only attach to one SimObject, we can modify our `LocalInstTracker` to notify a global object to keep tracking all committed instructions in all ProbeListeners. ![](09-extending-gem5-models-imgs/global-listener.drawio.svg) @@ -497,7 +501,7 @@ Since each ProbeListener can only attach to one SimObject, we can modify our Loc All materials about this section can be found under [`materials/03-Developing-gem5-models/09-extending-gem5-models/02-global-inst-tracker`](/materials/03-Developing-gem5-models/09-extending-gem5-models/02-global-inst-tracker). We can create a new SimObject to help us to keep track of all ProbeListeners. -Let's start to modify the `inst_tracker.hh` by adding a new SimObject class called `GlobalInstTracker`. +Let's start to modify `inst_tracker.hh` by adding a new SimObject class called `GlobalInstTracker`. ```cpp #include "params/GlobalInstTracker.hh" @@ -509,6 +513,7 @@ class GlobalInstTracker : public SimObject ``` --- + ## 02-global-inst-tracker @@ -537,7 +542,7 @@ public: ## 02-global-inst-tracker -So our `LocalInstTracker` now should only be like the following. Note that it has an pointer to a `GlobalInstTracker`. This is how we can notify the `GlobalInstTracker` from the `LocalInstTracker`. +So our `LocalInstTracker` now should only have the following. Note that it has a pointer to a `GlobalInstTracker`. This is how we can notify the `GlobalInstTracker` from the `LocalInstTracker`. ```cpp class LocalInstTracker : public ProbeListenerObject @@ -563,19 +568,19 @@ class LocalInstTracker : public ProbeListenerObject --- - + ## 02-global-inst-tracker Now, we need to decide how the `GlobalInstTracker` handles the notification from the `LocalInstTracker`. -We want it to count the number of global committed instruction, check if it reaches the threshold, and raise an exit event if it does. -Therefore, in `inst_tracker.hh`, let's add a `checkPc` function to the `GlobalInstTracker` too. +We want it to count the number of global committed instructions, check if it reaches the threshold, and raise an exit event if it does. +Therefore, in `inst_tracker.hh`, let's add a `checkPc` function to the `GlobalInstTracker` as well. ```cpp void checkPc(const uint64_t& inst); ``` -In `inst_tracker.cc`, let's define it as +In `inst_tracker.cc`, let's define it as: ```cpp void @@ -592,7 +597,7 @@ GlobalInstTracker::checkPc(const uint64_t& inst) ## 02-global-inst-tracker -Now, we need to modify the original `checkPc` function for the `LocalInstTracker` to notify the `GlobalInstTracker` +Now, we need to modify the original `checkPc` function for the `LocalInstTracker` to notify the `GlobalInstTracker`. ```cpp void @@ -602,7 +607,7 @@ LocalInstTracker::checkPc(const uint64_t& inst) } ``` -Don't forget to change the constructor of the `LocalInstTracker` +Don't forget to change the constructor of the `LocalInstTracker`. ```cpp LocalInstTracker::LocalInstTracker(const LocalInstTrackerParams &p) @@ -616,7 +621,7 @@ LocalInstTracker::LocalInstTracker(const LocalInstTrackerParams &p) ## 02-global-inst-tracker -We are almost done with C++ part. Let's don't forget about the `GlobalInstTracker`'s constructor in the `inst_tracker.cc` +We are almost done with the C++ part. Let's don't forget about the `GlobalInstTracker`'s constructor in the `inst_tracker.cc`. ```cpp GlobalInstTracker::GlobalInstTracker(const GlobalInstTrackerParams &p) @@ -626,7 +631,7 @@ GlobalInstTracker::GlobalInstTracker(const GlobalInstTrackerParams &p) {} ``` -After this, we need to modify the `InstTracker.py` for the new `GlobalInstTracker` and the modified `LocalInstTracker` +After this, we need to modify the `InstTracker.py` for the new `GlobalInstTracker` and the modified `LocalInstTracker`. --- @@ -676,7 +681,7 @@ class LocalInstTracker(ProbeListenerObject): ## 02-global-inst-tracker -Finally, the [gem5/src/cpu/probes/SConscript](../../gem5/src/cpu/probes/SConscript) +Finally, [gem5/src/cpu/probes/SConscript](../../gem5/src/cpu/probes/SConscript). ```python SimObject( @@ -699,7 +704,7 @@ scons build/X86/gem5.fast -j$(nproc) There is a simple SE script in [materials/03-Developing-gem5-models/09-extending-gem5-models/02-global-inst-tracker/simple-sim.py](../../materials/03-Developing-gem5-models/09-extending-gem5-models/02-global-inst-tracker/simple-sim.py). -We can test our `GlobalInstTracker` with it using the command +We can test our `GlobalInstTracker` with it using this command: ```bash cd /workspaces/2024/materials/03-Developing-gem5-models/09-extending-gem5-models/02-global-inst-tracker @@ -712,7 +717,7 @@ This script runs the same workload we did in 01-local-inst-tracker, but with the ## 02-global-inst-tracker -It creates a `GlobalInstTracker` and when each `LocalInstTracker` attaches to the core, it passes itself as a reference to the `global_inst_tracker` parameter +It creates a `GlobalInstTracker` and when each `LocalInstTracker` attaches to the core, it passes itself as a reference to the `global_inst_tracker` parameter. ```python from m5.objects import LocalInstTracker, GlobalInstTracker @@ -737,8 +742,8 @@ for core in processor.get_cores(): We start to listen when workbegin is raised, then exit the simulation after 100,000 instructions are committed accumulatively by all cores. Also, we reset the stats at workbegin, so we can verify if the `GlobalInstTracker` actually did its job. -If the simulation finished, we can count the stats. -There is a helper python file [materials/03-Developing-gem5-models/09-extending-gem5-models/02-global-inst-tracker/count_commited_inst.py](../../materials/03-Developing-gem5-models/09-extending-gem5-models/02-global-inst-tracker/count_commited_inst.py) for us to easily calculate the total committed instructions by all 8 cores. +If the simulation is finished, we can count the stats. +There is a helper Python file [materials/03-Developing-gem5-models/09-extending-gem5-models/02-global-inst-tracker/count_commited_inst.py](../../materials/03-Developing-gem5-models/09-extending-gem5-models/02-global-inst-tracker/count_commited_inst.py) to let us easily calculate the total committed instructions by all 8 cores. Let's run it with ```python @@ -754,5 +759,5 @@ Total committed instructions: 100000 ## Summary -The ProbePoint is a useful tool to profile or add helper features for our simulation without adding too much to the components' codebase. +ProbePoint is a useful tool to profile or add helper features to our simulations without adding too much to the components' codebase. diff --git a/slides/05-Other-simulators/01-sst.md b/slides/05-Other-simulators/01-sst.md index 4162f83..872949a 100644 --- a/slides/05-Other-simulators/01-sst.md +++ b/slides/05-Other-simulators/01-sst.md @@ -15,9 +15,9 @@ title: gem5/SST Integration We're not going to do this today. -What we'll do instead is use a docker container with sst installed. +What we'll do instead is use a Docker container with SST installed. -Run the following to go into the docker container. +Run the following to go into the Docker container. Note: You shouldn't use a container interactively like this, but I'm lazy. ```sh @@ -34,7 +34,7 @@ To use gem5 as a "component" in SST, you need to build it as a library. This is yet another unique build target... Note: if you're building on a Mac, it's not ".so" it's ".dynlib" -Compiling gem5 as a library +Compiling gem5 as a library: ```bash cd gem5/ @@ -46,20 +46,20 @@ scons build/for_sst/libgem5_opt.so -j8 --without-tcmalloc --duplicate-sources ## Building the gem5 component in gem5 -Compiling gem5 component +Compiling gem5 component: ```bash cd ext/sst cp Makefile.linux Makefile ``` -Change the line with `ARCH=RISCV` to `ARCH=for_sst` +Change the line with `ARCH=RISCV` to `ARCH=for_sst`. ```sh make -j8 ``` -Running the simulation, +Running the simulation: ```bash sst --add-lib-path=. sst/example.py @@ -97,9 +97,9 @@ sst --add-lib-path=. sst/example.py How to set up gem5 in another simulator? * Step 1: Setting up the gem5 Python environment. - * Need to manually import the m5 module + * Need to manually import the m5 module. * Step 2: Reading the gem5 Python system configuration file. - * This includes setting up the communication data path for gem5 and the other simulator + * This includes setting up the communication data path for gem5 and the other simulator. * Notes: @@ -149,10 +149,10 @@ http://sst-simulator.org/ * SST::Event (similar to gem5::Event) * Sent via SST::Link -* Parallelization, +* Parallelization: * SST partitions components to multiple partitions. * Communication between partitions are done via MPI. - * The partitioning process can be done automatically or manually + * The partitioning process can be done automatically or manually. --- @@ -173,7 +173,7 @@ http://sst-simulator.org/ * gem5 provides: * OutgoingRequestBridge: a Request port sending requests to external components. * SSTResponderInterface: an interface for a Response port for an external component. -* gem5 Component is an SST::Component, which has multiple SSTResponder's implementing the SSTReponderInterface. +* gem5 Component is a SST::Component, which has multiple SSTResponder's implementing the SSTReponderInterface. * The packet translation happens within the gem5 Component. --- @@ -186,7 +186,7 @@ http://sst-simulator.org/ ## gem5/SST Integration -* Example (arm and RISC-V): +* Example (Arm and RISC-V): * gem5 as an SST component: gem5/ext/sst/ * SST system configuration: gem5/ext/sst/sst/example.py * gem5 system configuration: gem5/configs/example/sst/riscv_fs.py @@ -218,7 +218,7 @@ http://sst-simulator.org/ * Each gem5 component is in a different partition. * Communication between gem5 instances can be done via gem5 PIO devices. * Why? - * There are more parallelism at the node granularity. + * There is more parallelism at the node granularity. --- diff --git a/slides/05-Other-simulators/02-dram.md b/slides/05-Other-simulators/02-dram.md index 2348be9..365700d 100644 --- a/slides/05-Other-simulators/02-dram.md +++ b/slides/05-Other-simulators/02-dram.md @@ -19,18 +19,18 @@ title: Extending gem5 with DRAMSim and DRAMSys ## Why use an external simulator? > Note: I don't advise using external DRAM simulators. -> gem5's DRAM model is accurate enough for most research +> gem5's DRAM model is accurate enough for most research. The main reasons to use an external DRAM simulator are: -- For comparisons between gem5's DRAM models and other simulators (e.g., when developing a new DRAM model for gem5) -- When you have already modified the other simulator and need to drive it with realistic traffic +- For comparisons between gem5's DRAM models and other simulators (e.g., when developing a new DRAM model for gem5). +- When you have already modified the other simulator and need to drive it with realistic traffic. --- ## Getting DRAMSys -See [`gem5/ext/dramsys/README`](../../gem5/ext/dramsys/README) for deatils. +See [`gem5/ext/dramsys/README`](../../gem5/ext/dramsys/README) for details. Run @@ -53,21 +53,21 @@ scons build/NULL/gem5.opt -j$(nproc) ## Using DRAMSys -See for documentation on DRAMSys +See for documentation on DRAMSys. To configure gem5 to use DRAMSys, you can use the standard library. DRAMSys can be used as a `MemorySystem` just like the `SingleChannel` or `MultiChannel` memories. Open [`materials/05-Other-simulators/02-dram/dramsys-example.py`](../../materials/05-Other-simulators/02-dram/dramsys-example.py). -Add the following lines to create a memory system with DDR4 from DRAMSys +Add the following lines to create a memory system with DDR4 from DRAMSys. ```python memory = DRAMSysMem( configuration="/workspaces/2024/gem5/ext/dramsys/DRAMSys/configs/ddr4-example.json", recordable=True, resource_directory="/workspaces/2024/gem5/ext/dramsys/DRAMSys/configs", - size="4GB", + size="4GiB", ) ``` @@ -81,11 +81,11 @@ Options for DRAMSys: - Must be absolute or relative to your run path. - `resource_directory`: Pointer to the configs directory. - Must be absolute or relative to your run path. -- `recordable`: Whether DRAMSys should record a trace file +- `recordable`: Whether DRAMSys should record a trace file. ### Note on implementation -- DRAMSys uses TLM 2.0 +- DRAMSys uses TLM 2.0. - This is a good example of how to get gem5 to talk to a TLM object. --- @@ -96,7 +96,7 @@ Options for DRAMSys: ../../../gem5/build/NULL/gem5.opt dramsys-example.py ``` -```test +```text board.memory.dramsys.DRAMSys.controller0 Total Time: 250027920 ps board.memory.dramsys.DRAMSys.controller0 AVG BW: 87.97 Gb/s | 11.00 GB/s | 73.67 % board.memory.dramsys.DRAMSys.controller0 AVG BW\IDLE: 87.97 Gb/s | 11.00 GB/s | 73.67 % @@ -111,9 +111,9 @@ Outputs a file, `board.memory.dramsys.DRAMSys_ddr4-example_example_ch0.tdb` whic ## DRAMSim -Similar to DRAMSys in how to obtain and use +Similar to DRAMSys in how to obtain and use. -> Note: DRAMSim3 is not tested regularly +> Note: DRAMSim3 is not tested regularly. See [`gem5/ext/dramsim3/README`](../../gem5/ext/dramsim3/README) for details. diff --git a/slides/06-Contributing/03-gem5-at-home.md b/slides/06-Contributing/03-gem5-at-home.md index 6a49e9e..fb37f3f 100644 --- a/slides/06-Contributing/03-gem5-at-home.md +++ b/slides/06-Contributing/03-gem5-at-home.md @@ -36,7 +36,7 @@ These links and more information are also available at [https://www.gem5.org/ask - gem5 performance qualities - Single threaded - - Consumes lots of RAM (if you want to model 32 GB of memory, it needs 32 GB of memory to model it) + - Consumes lots of RAM (if you want to model 32 GiB of memory, it needs 32 GiB of memory to model it) - Can take a lot of time - Because of this its best to run multiple experiments in parallel - Recommended hardware: From 4ad2425d6d5a5ff4821b65ddf4eb6f12b941227a Mon Sep 17 00:00:00 2001 From: Erin Le Date: Fri, 23 Aug 2024 00:33:17 +0000 Subject: [PATCH 6/7] Messed up with stashing/rebasing; committing changes that clarify power of 2 vs 10 and possibly some typo fixes before cherry-picking the commit that definitely has the typo fixes --- slides/03-Developing-gem5-models/04-ports.md | 2 +- slides/06-Contributing/01-contributing.md | 77 ++++++++++---------- slides/06-Contributing/02-testing.md | 53 ++++++++------ slides/06-Contributing/03-gem5-at-home.md | 30 ++++---- 4 files changed, 87 insertions(+), 75 deletions(-) diff --git a/slides/03-Developing-gem5-models/04-ports.md b/slides/03-Developing-gem5-models/04-ports.md index 230b36b..8d9bef6 100644 --- a/slides/03-Developing-gem5-models/04-ports.md +++ b/slides/03-Developing-gem5-models/04-ports.md @@ -934,7 +934,7 @@ If you remember from [Event Driven Simulation](./03-event-driven-sim.md), we als Now that we have declared `nextReqSendEvent`, we can schedule `nextReqSendEvent` in `InspectorGadget::recvTimingReq`. We will see in a few slides why it is helpful to have a function that decides if and when `nextReqSendEvent` should be scheduled. -What I do when I write `SimObjects` is that, for every `event`, I create a function to schedule that event. I name these functions with `schedule` prefixing the name of the event. Let's go ahead and declare `scheduleNextReqSendEvent` under the `private` scope in `InspectorGadget`. +What I do when I write `SimObjects` is that for every `event`, I create a function to schedule that event. I name these functions with `schedule` prefixing the name of the event. Let's go ahead and declare `scheduleNextReqSendEvent` under the `private` scope in `InspectorGadget`. Open `inspector_gadget.hh` and add the following lines: diff --git a/slides/06-Contributing/01-contributing.md b/slides/06-Contributing/01-contributing.md index a1d53c9..db64c82 100644 --- a/slides/06-Contributing/01-contributing.md +++ b/slides/06-Contributing/01-contributing.md @@ -54,19 +54,19 @@ That's understandable. However, please keep the following in mind: 1. _Everyone_, even the most experienced gem5 devs, have had their changes rejected. -There will always exist a pull-request on the gem5 GitHub so the changes are never "gone". -The reasons for rejection are not personal, but are often concerns about how it will affect users or long term maintainability. -If a change would be a lot of time to implement, try contacting the community to see if it'd be welcome before starting. +The pull request will always exist on the gem5 GitHub so the changes are never "gone". + - The reasons for rejection are not personal, but are often concerns about how it will affect users or long term maintainability. + - If a change would take a lot of time to implement, try contacting the community to see if it'd be welcome before starting. 2. The gem5 devs are nice people and are not trying to be mean. -We have to critique code contributed but we try our best to ensure it's constructive. Where possible we'll suggest how address our concerns. -Again, nothing is personal. + - We have to critique code contributed but we try our best to ensure it's constructive. Where possible we'll suggest how address our concerns. + - Again, nothing is personal. --- 3. Very very few changes of any magnitude are accepted without some back and forth requests. -Everyone who's worked on gem5 long enough has changes that needed 5 or 6 iterations before they were accepted. It shouldn't be feared or seen as a bad thing. + - Everyone who's worked on gem5 long enough has changes that needed 5 or 6 iterations before they were accepted. It shouldn't be feared or seen as a bad thing. 4. No one fully understands the gem5 codebase. -There are parts of gem5 no one understands. It's ok to feel you don't understand the codebase completely, but it's not a reason to not contribute to the parts you do. + - There are parts of gem5 no one understands. It's ok to feel you don't understand the codebase completely, but it's not a reason to not contribute to the parts you do. --- @@ -85,7 +85,7 @@ If you just want to try contributing, but don't have a specific idea, try lookin A large part of avoiding or fixing changes to get around this regards testing. 2. _Something for which we can't validate correctness, now or in the future_: If you've developed something that's hard to test, or that we can't easily validate is correct, it's unlikely to be accepted. We can't just read the code and always understand that it's functional. **To avoid this provide tests with your changes** (more of this later). 3. _Features that are overly niche and lack general applicability to the typical gem5 user_: If it's something you and only one or two other people will use, it's unlikely to be accepted. In these cases it's probably better to maintain a fork of gem5 with your changes. -4. _It doesn't conform our standards_: (typically style guidelines) the code is fine, it works, but you need to make some changes to make it conform to our style guidelines. This is a common reason for changes to be rejected, but it's also one of the easiest to fix. +4. _It doesn't conform our standards_ (typically style guidelines): The code is fine, it works, but you need to make some changes to make it conform to our style guidelines. This is a common reason for changes to be rejected, but it's also one of the easiest to fix. --- @@ -117,12 +117,11 @@ git clone https://github.com/your-username/gem5.git ## Your forked repo: Some tips and good housekeeping -- In gem5, don't makes changes to your repo's `stable` and `develop` branch. -It's best to keep these as branches as reference to the main gem5 repo. -Instead create new branch from these: +- In gem5, don't makes changes to your repo's `stable` and `develop` branch. It's best to keep these as branches as reference to the main gem5 repo. +- Instead create new branches using these: ```shell -git switch develop # Gets the branch locally the first time it is run +git switch develop # Gets the develop branch locally the first time it is run git branch -c develop new-branch. ``` @@ -148,26 +147,26 @@ In gem5, developers' changes are only merged into the `develop` branch. The `dev There are multiple ways to do this. -1. Via the web interface: Go to your forked repo on GitHub go to the `stable` or `develop` branch and click the "Fetch upstream" button "sync fork" (note: you have to do this for each branch). Then pull the changes into your local repo with `git pull origin stable` and/or `git pull origin develop`. +1. Via the web interface: Go to your forked repo on GitHub. Go to the `stable` or `develop` branch and click the "Sync fork" button (note: you have to do this for each branch). Then pull the changes into your local repo with `git pull origin stable` and/or `git pull origin develop`. 2. Use the GitHub CLI: `gh repo sync {username}/gem5 -b develop && gh repo sync {username}/gem5 -b stable` will sync your forked repo (on GitHub) with the main gem5 repo. You can then pull the changes into your local repo with `git switch stable && git pull && git switch develop && git pull`. > **Note**: We will not cover this in this tutorial. See (https://cli.github.com/) -3. Through the git tool in your local repo by fetching the (the main gem5 repo) and merge the upstream into your local repo. +3. Through the Git tool in your local repo by fetching the main gem5 repo and merging the upstream into your local repo. --- + ## Syncing your local repo ```shell git remote -v ``` -Typically you'll have an `origin` which is your GitHub repo you pulled this from. -GitHub will also add a remote called `upstream`, for forked repos, which is the main gem5 repo. -We'll keep with this naming convention, but please be aware these "remotes" can be named anything. +Typically you'll have an `origin` which is your GitHub repo which you pulled this from. GitHub will also add a remote called `upstream`, for forked repos, which is the main gem5 repo. +- We'll keep with this naming convention, but please be aware these "remotes" can be named anything. -If you need to add the upstream you can do with +If you need to add the upstream you can do with: ```shell git remote add upstream https://github.com/gem5/gem5.git @@ -197,7 +196,7 @@ git push origin develop **Please note:** To push to your GitHub you'll need to authenticate yourself on the system you're using. There's a few ways to do this and may be dependent on how you've set up your GitHub account. We won't cover this here, but you can find out more here: . Pushing in this tutorial isn't important. You can figure this out in your own time. -> [More on github's help page](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/syncing-a-fork) +> More on [GitHub's help page](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/syncing-a-fork). --- @@ -234,8 +233,7 @@ git add hello.txt git commit -m "misc: Adding goodbye to hello.txt" ``` -Please, for now, include `misc:` in your commit messages. -This will be explained later. +Please, for now, include `misc:` in your commit messages. This will be explained later. --- @@ -248,7 +246,7 @@ The first time you'll likely need to set the upstream branch with: git push --set-upstream origin my-change ``` -This is done to inform your git repo that this local branch is to be pushed to the "origin" remote (your GitHub repo) and that it should track the remote branch. This is what `--set-upstream` does. +This is done to inform your Git repo that this local branch is to be pushed to the "origin" remote (your GitHub repo) and that it should track the remote branch. This is what `--set-upstream` does. **Note**: It's unfortunate "upstream" is used in two different contexts here. In this case upstream is your GitHub based repo: it is what is immediately "upstream" to this your local repo. However the upstream in `git remote -v` is the main gem5 repo. In this case it's what's "upstream" from origin. There's a chain of upstreams: your local repo contributes upstream to your GitHub repo which, via a pull request, contributes upstream to the main gem5 repo. @@ -289,9 +287,11 @@ If the CI tests fail or a reviewer requests changes before approval you'll need --- + + ## Updating the PR -In Github, all you need to do is update the branch you've submitted the PR from on Github. +In GitHub, all you need to do is update the branch you've submitted the PR from on GitHub. (i.e., the branch from your forked repo). ### Add a commit @@ -315,6 +315,7 @@ git push origin my-change ## Rebasing when gem5 is updated You can _rebase_ your branch to make changes to existing commits in it. + This is useful if you need to change a commit message or change the order of commits, change contents of commits, merge commits, or delete commits. It is very powerful but can be dangerous if you're not sure what you're doing. @@ -344,7 +345,7 @@ pick a1b2c3d misc: Adding bla to hello.txt pick e4f5g6h misc: Adding goodbye to hello.txt ``` -delete commits: +Delete commits: ```shell pick i7j8k9l misc: Adding hello.txt @@ -359,6 +360,7 @@ reword a1b2c3d misc: Adding bla to hello.txt ``` --- + ## Rebasing @@ -369,7 +371,7 @@ edit i7j8k9l misc: Adding hello.txt reword a1b2c3d misc: Adding bla to hello.txt ``` -or squash commits: +Or squash commits: ```shell pick i7j8k9l misc: Adding hello.txt @@ -377,9 +379,8 @@ fixup a1b2c3d misc: Adding bla to hello.txt ``` **Warning**: Rebase errors can arise (similar to merge conflicts) and can be difficult to fix. -If you're not sure what you're doing, it's best to avoid rebasing and just add commits. -Generally though, using `fixup` and `squash` are safe, as is `reword`. -Difficulties arise when moving, deleting, or editing commits. +- If you're not sure what you're doing, it's best to avoid rebasing and just add commits. +- Generally though, using `fixup` and `squash` are safe, as is `reword`. Difficulties arise when moving, deleting, or editing commits. --- @@ -393,12 +394,13 @@ The following are basic requirements for a PR to be accepted: - Commit messages contain a Change-Id. --- + ## Using `pre-commit` Fortunately there's a tool which can help with _most_ of this: Python `pre-commit`. -`pre-commit` is a tool that runs a series of checks on your code before you commit it. -It checks for code style and formatting issues, and runs some other basic checks in your local repo, allowing you to catch problems before you submit a PR. +- `pre-commit` is a tool that runs a series of checks on your code before you commit it. + - It checks for code style, formatting issues, and runs some other basic checks in your local repo, allowing you to catch problems before you submit a PR. For the following `pre-commit` will detect and automatically and correct any problems: @@ -418,7 +420,7 @@ For now CPP formatting is a manual process. ## Installing pre-commit -`pre-commit` triggers a series of checks when `git commit` is run. It is a git hook which is executed before the commit is made. +`pre-commit` triggers a series of checks when `git commit` is run. It is a Git hook which is executed before the commit is made. To install `pre-commit` execute the following: @@ -461,7 +463,7 @@ Issue: https://github.com/gem5/gem5/issues/123 ## Formatting a commit message -A description may spawn multiple paragraphs if desired. It can be useful to add metadata about the change at the end. In particular, a link to the Issue it addresses is helpful. +A description may spawn multiple paragraphs if desired. It can be useful to add metadata about the change at the end. In particular, a link to the issue it addresses is helpful. **Important**: @@ -530,10 +532,10 @@ class ExampleClass ## Spacing -- One space between keywords (if, for, while, etc.) and opening parentheses -- One space around binary operators (+, -, <, >, etc.) including assignment operators (=, +=, etc.) -- No space around ‘=’ when used in parameter/argument lists, either to bind default parameter values (in Python or C++) or to bind keyword arguments (in Python) -- No space between function names and opening parentheses for arguments +- One space between keywords (if, for, while, etc.) and opening parentheses. +- One space around binary operators (+, -, <, >, etc.) including assignment operators (=, +=, etc.). +- No space around ‘=’ when used in parameter/argument lists, either to bind default parameter values (in Python or C++) or to bind keyword arguments (in Python). +- No space between function names and opening parentheses for arguments. - No space immediately inside parentheses, except for very complex expressions. Complex expressions are preferentially broken into multiple simpler expressions using temporary variables. --- @@ -549,6 +551,7 @@ class ExampleClass This includes function arguments (e.g., `myFunction(int arg_one, int arg_two)`). --- + ## Another code example @@ -573,7 +576,7 @@ class FooBarCPU --- - + ## Include blocks diff --git a/slides/06-Contributing/02-testing.md b/slides/06-Contributing/02-testing.md index d032a5d..97cf2fc 100644 --- a/slides/06-Contributing/02-testing.md +++ b/slides/06-Contributing/02-testing.md @@ -23,7 +23,7 @@ In practice, if we asked for tests this way we would receive no contributions. ### Our outlook on testing - If a feature isn't tested, we don't "support it" (e.g., DRAMSim3). -- If the "gem5 developers" want to add a supported feature, we are usually the ones to add a test. +- If the "gem5 developers" want to add a supported feature, we are usually the ones to add tests. - Adding tests takes time away from fixing bugs, adding new features, etc. --- @@ -31,11 +31,11 @@ In practice, if we asked for tests this way we would receive no contributions. ## gem5 Test Categories We run tests on the gem5 codebases regularly to ensure that changes do not break the code. -These tests exist in four main categories; +These tests exist in four main categories: 1. **CPP Unit tests**: These are tests that run C++ code. In gem5, we use the Google Test framework. 2. **Python Unit tests**: These are tests that run Python code. In gem5, we use the Python unittest framework. -3. **TestLib Tests**: These are tests that run gem5 simulations, verify exit codes, and compare output to expected output ("testlib" is the name of the framework used to do this). +3. **TestLib Tests**: These are tests that run gem5 simulations, verify exit codes, and compare output to expected output ("TestLib" is the name of the framework used to do this). 4. **Compilation Tests**: Tests which compile gem5 under different configurations with different compilers/environments. > There are some tests we run which don't fit into these categories, but these are the main ones. @@ -44,25 +44,26 @@ These tests exist in four main categories; ## gem5 Test Schedule -1. **CI Tests**: These tests are run on every pull request to gem5, and every update to any pull request. The CI tests consist of the CPP and Python unit tests and a subset of the TestLib tests and Compilation tests. These are designed to run "quickly" (by gem5 standards), in under 4 hours. +1. **CI Tests**: These tests are run on every pull request to gem5, and every update to any pull request. + - The CI tests consist of the CPP and Python unit tests and a subset of the TestLib tests and Compilation tests. These are designed to run "quickly" (by gem5 standards), in under 4 hours. 2. **Daily Tests**: These tests are run every day on the gem5 codebase. -These tests consist of the larger Testlib tests. They typically take 12 hours or more to complete. + - These tests consist of the larger TestLib tests. They typically take 12 hours or more to complete. 3. **Weekly Tests**: These tests are run weekly on the gem5 codebase. -These tests consist of the largest Testlib test suite and the Compilation tests. These tests typically take 1 to 2 days to complete. + - These tests consist of the largest TestLib test suite and the Compilation tests. These tests typically take 1 to 2 days to complete. 4. **Compiler Tests**: These tests are run every week. -These run a cross product of gem5 compilation targets and compilers the project currently supports. These tests usually take around 12 hours to complete. + - These run a cross product of gem5 compilation targets and compilers the project currently supports. These tests usually take around 12 hours to complete. --- ## GitHub Actions -The complete GitHub Actions workflow for these tests can be found in the [.github/workflows/](https://github.com/gem5/gem5/blob/v24.0/.github/workflows) in the gem5 repository. +The complete GitHub Actions workflow for these tests can be found in [.github/workflows/](https://github.com/gem5/gem5/blob/v24.0/.github/workflows) in the gem5 repository. -We not go over these in this session but you can look over these yaml files and see how GitHub Actions is triggered to run these the gem5 tests. +We will not go over these in this session but you can look over these yaml files and see how GitHub Actions is triggered to run these the gem5 tests. These tests run on "self-hosted" runners. There is a machine at Wisconsin (loupe) which runs these tests. -> Bonus points if you know what a "loupe" is +> Bonus points if you know what a "loupe" is. --- @@ -102,9 +103,10 @@ scons build/ALL/base/bitfield.test.opt More information on Python's unittest framework can be found at The tests are run with `gem5 tests/run_pyunit.py` command. -In our case, any file in the "tests/pyunit" directory with the prefix "pyunit_" is considered a test by the test runner. -Individual subdirectories in "tests/pyunit" can be specified and run separately by passing those subdirectories as arguments to "tests/run_pytests.py". E.g.: `gem5 tests/run_pyunit.py --directory tests/pyunit/util`. +In our case, any file in the [tests/pyunit](https://github.com/gem5/gem5/tree/stable/tests/pyunit) directory with the prefix "pyunit_" is considered a test by the test runner. + +Individual subdirectories in [tests/pyunit](https://github.com/gem5/gem5/tree/stable/tests/pyunit) can be specified and run separately by passing those subdirectories as arguments to "tests/run_pytests.py". E.g.: `gem5 tests/run_pyunit.py --directory tests/pyunit/util`. --- @@ -130,7 +132,7 @@ TestLib tests are the most important tests in gem5. These tests run gem5 simulations and verify the output of the simulation. The tests are written in Python and use the "testlib" framework to run the simulations and verify the output. -We do not have very good coverage of unittests, so most of our testing is in integration tests via the TestLib. +We do not have very good coverage of unit tests, so most of our testing is in integration tests via the TestLib. --- @@ -145,7 +147,8 @@ It's useful to just focus on a subdirectory of tests when running tests: ``` The above will only run the "quick" tests in the "tests/gem5/memory" directory. -The "quick" tests are the testlib tests run in the CI pipeline. To run the tests in the "daily" or "weekly" test suites, you can use the `--length` to specify `long` or `very-long` (`quick` is the `length` default). + +The "quick" tests are the TestLib tests that are run in the CI pipeline. To run the tests in the "daily" or "weekly" test suites, you can use the `--length` to specify `long` or `very-long` (`quick` is the `length` default). --- @@ -153,7 +156,7 @@ The "quick" tests are the testlib tests run in the CI pipeline. To run the tests The `./main.py list` command can be used to list all the tests in a directory, which we'll demonstrate here: -```shell +```bash # List all the long tests in tests/gem5/memory: Those run in the Daily Tests. ./main.py list --length long gem5/memory @@ -165,10 +168,11 @@ The `./main.py list` command can be used to list all the tests in a directory, w ## How TestLib Tests are declared -Let's look at ["tests/gem5/m5_util"](https://github.com/gem5/gem5/blob/v24.0/tests/gem5/m5_util) to see how a test is declared. +Let's look at [tests/gem5/m5_util](https://github.com/gem5/gem5/blob/v24.0/tests/gem5/m5_util) to see how a test is declared. In this directory there is "test_exit.py". -Any file with the prefix "test_" is considered a test by the testlib framework and will be automatically run when the tests are executed. + +Any file with the prefix "test_" is considered a test by the TestLib framework and will be automatically run when the tests are executed. "configs" is a directory of configuration scripts that are used to run the tests defined in "test_exit.py". @@ -178,7 +182,7 @@ Now, let's look into "test_exit.py" and see how the tests are declared. ## Declaring how to test -Import testlib library (found in `gem5/ext/testlib`) +Import TestLib library (found in `gem5/ext/testlib`). ```python from testlib import * @@ -186,7 +190,7 @@ from testlib import * Many of the tests use regex matching. For example, we check to see if the `m5_exit` instruction is encountered below and then create a *verifier*. -**`verifier`** is used by the testlib to check the output. +**`verifier`** is used by TestLib to check the output. ```py m5_exit_regex = re.compile( @@ -261,27 +265,28 @@ TestUID:tests/gem5/m5_util/test_exit.py:m5_exit_test-ALL-x86_64-opt:m5_exit_test ## Running the tests -Then run with +Then run with: ```shell ./main.py run gem5/m5_util ``` -**Note**: This will try and build "ALL/gem5.opt" each time you run the tests. -This can be time consuming. +**Note**: This will try and build "ALL/gem5.opt" each time you run the tests. This can be time consuming. + You can pre-build the ALL/gem5.opt build with `scons build/ALL/gem5.opt -j$(nproc)` then, when running `./main.py run gem5/m5_util` add the `--skip-build` flag to skip the build step: `./main.py run --skip-build gem5/m5_util`. If you want/need to build at this step, pass `-j$(nproc)` to the `./main.py run` command. --- + ## Exercise: Creating a TestLib Test Go to [materials/06-Contributing/02-testing/01-testlib-example](../../materials/06-Contributing/). -Move "01-testlib-example" to "tests/gem5/" in the gem5 repository. +Move `01-testlib-example` to [tests/gem5/](../../gem5/tests/gem5) in the gem5 repository. -Provided in "test_example.py" is the `gem5_verify_config` function which is used to define testlib tests. +Provided in `test_example.py` is the `gem5_verify_config` function which is used to define TestLib tests. ```python gem5_verify_config( diff --git a/slides/06-Contributing/03-gem5-at-home.md b/slides/06-Contributing/03-gem5-at-home.md index fb37f3f..c2d79d1 100644 --- a/slides/06-Contributing/03-gem5-at-home.md +++ b/slides/06-Contributing/03-gem5-at-home.md @@ -12,6 +12,8 @@ author: William Shaddix --- + + ## Getting help gem5 has lots of resources to get help: @@ -26,7 +28,7 @@ gem5 has lots of resources to get help: - [gem5-announce@gem5.org : For general gem5 announcements](https://harmonylists.io/list/gem5-announce.gem5.org) 3. [Youtube videos](https://www.youtube.com/@gem5) -These links and more information are also available at [https://www.gem5.org/ask-a-question/](https://www.gem5.org/ask-a-question/) +These links and more information are also available at [https://www.gem5.org/ask-a-question/](https://www.gem5.org/ask-a-question/). > We do our best to get to questions, but they often go unanswered. This isn't because it's not a good question, but because we don't have enough volunteers. @@ -34,11 +36,11 @@ These links and more information are also available at [https://www.gem5.org/ask ## Running gem5 at home -- gem5 performance qualities +- gem5 performance qualities: - Single threaded - Consumes lots of RAM (if you want to model 32 GiB of memory, it needs 32 GiB of memory to model it) - Can take a lot of time -- Because of this its best to run multiple experiments in parallel +- Because of this its best to run multiple experiments in parallel. - Recommended hardware: - High single thread performance - Doesn't need many cores @@ -62,24 +64,26 @@ See our Dockerfiles for the most up-to-date version information: --- -## Using dockerfiles + + +## Using Dockerfiles -If you have trouble, we have docker images. +If you have trouble, we have Docker images. -Here's a generic docker command that should work. +Here's a generic Docker command that should work. ```sh docker run --rm -v $(pwd):$(pwd) -w $(pwd) ghcr.io/gem5/ubuntu-24.04_all-dependencies:v24-0 ``` - Runs the image at `https://ghcr.io/gem5/ubuntu-24.04_all-dependencies:v24-0`. -- Automatically removes the docker image (`--rm`) -- Sets it up so that the current directory (`-v $(pwd):$(pwd)`) is available inside the docker container -- Sets the working directory to the current directory (`-w $(pwd)`) +- Automatically removes the Docker image (`--rm`). +- Sets it up so that the current directory (`-v $(pwd):$(pwd)`) is available inside the Docker container. +- Sets the working directory to the current directory (`-w $(pwd)`). - Runs a command. - Every command will now need to run with this to make sure the libraries are set up correctly. -> I cannot **strongly enough** emphasize that you should not run interactively in the docker container. Use it to just run one command at a time. +> I cannot **strongly enough** emphasize that you should not run interactively in the Docker container. Use it to just run one command at a time. --- @@ -95,9 +99,9 @@ The source will be at [`gem5/utils/dockerfiles/devcontainer`](https://github.com ## Recommended practices -- Unless planning on contributing to gem5 or you need to use recently developed work, use the ```stable``` branch. -- Create branches off of stable. -- Don't modify parameters of python files in `src/`. Instead create *extensions* of stdlib types or SimObjects. +- Unless you are planning on contributing to gem5 or need to use recently developed work, use the `stable` branch. +- Create branches off of `stable`. +- Don't modify parameters of Python files in `src/`. Instead create *extensions* of stdlib types or SimObjects. - Don't be afraid to read the code. The code is the best documentation. --- From 559cc64529394da57bddc66117145767e0b1e25a Mon Sep 17 00:00:00 2001 From: Erin Le Date: Fri, 23 Aug 2024 02:34:05 +0000 Subject: [PATCH 7/7] Clarified power of 10 vs power of 2 in bootcamp materials --- .../17-inheriting-from-a-simobject.py | 2 +- materials/02-Using-gem5/01-stdlib/01-02-fs-mode.py | 2 +- materials/02-Using-gem5/01-stdlib/02-processor.py | 8 ++++---- .../02-Using-gem5/01-stdlib/completed/01-02-fs-mode.py | 8 ++++---- .../02-Using-gem5/01-stdlib/completed/01-components.py | 6 +++--- .../02-Using-gem5/01-stdlib/completed/02-processor.py | 8 ++++---- .../02-Using-gem5/02-gem5-resources/01-hello-example.py | 2 +- .../02-gem5-resources/02-suite-workload-example.py | 2 +- .../02-gem5-resources/03-run-local-resource-local-path.py | 4 ++-- .../02-gem5-resources/04-run-local-resource-json.py | 4 ++-- .../02-gem5-resources/complete/01-hello-example.py | 2 +- .../complete/02-suite-workload-example.py | 2 +- .../complete/03-run-local-resource-local-path.py | 4 ++-- .../complete/04-run-local-resource-json.py | 4 ++-- .../00-SE-hello-world/00-SE-hello-world.py | 6 +++--- .../03-running-in-gem5/03-run-x86-SE/03-run-x86-SE.py | 6 +++--- .../03-run-x86-SE/complete/03-run-x86-SE.py | 6 +++--- .../03-running-in-gem5/05-run-arm-SE/05-run-arm-SE.py | 6 +++--- .../05-run-arm-SE/complete/05-run-arm-SE.py | 6 +++--- .../step-1-linear-gen/simple-traffic-generators.py | 2 +- .../step-2-hybrid-gen/components/hybrid_generator.py | 2 +- .../06-traffic-gen/components/hybrid_generator.py | 2 +- .../05-cache-hierarchies/completed/three_level.py | 2 +- .../02-Using-gem5/05-cache-hierarchies/test-cache.py | 2 +- .../02-Using-gem5/05-cache-hierarchies/three_level.py | 2 +- materials/02-Using-gem5/06-memory/blank_memory.py | 2 +- materials/02-Using-gem5/06-memory/comm_monitor.py | 6 +++--- .../02-Using-gem5/06-memory/completed/blank_memory.py | 4 ++-- .../02-Using-gem5/06-memory/completed/comm_monitor.py | 8 ++++---- .../07-full-system/completed/x86-fs-gapbs-kvm-run.py | 4 ++-- .../07-full-system/completed/x86-fs-kvm-run.py | 4 ++-- .../02-Using-gem5/07-full-system/x86-fs-gapbs-kvm-run.py | 4 ++-- materials/02-Using-gem5/07-full-system/x86-fs-kvm-run.py | 4 ++-- .../08-accelerating-simulation/02-kvm-time/02-kvm-time.py | 6 +++--- .../02-kvm-time/complete/02-kvm-time.py | 6 +++--- .../03-restore-the-checkpoint.py | 6 +++--- .../03-checkpoint-and-restore/03-take-a-checkpoint.py | 2 +- .../complete/03-restore-the-checkpoint.py | 6 +++--- .../complete/03-take-a-checkpoint.py | 4 ++-- .../09-sampling/01-simpoint/complete/full-detailed-run.py | 8 ++++---- .../09-sampling/01-simpoint/complete/simpoint-analysis.py | 2 +- .../01-simpoint/complete/simpoint-checkpiont.py | 2 +- .../09-sampling/01-simpoint/complete/simpoint-run.py | 8 ++++---- .../09-sampling/01-simpoint/full-detailed-run.py | 8 ++++---- .../09-sampling/01-simpoint/simpoint-analysis.py | 2 +- .../09-sampling/01-simpoint/simpoint-checkpoint.py | 2 +- .../02-Using-gem5/09-sampling/01-simpoint/simpoint-run.py | 8 ++++---- .../09-sampling/02-elfies/complete/run-elfies.py | 2 +- .../02-Using-gem5/09-sampling/02-elfies/run-elfies.py | 2 +- materials/02-Using-gem5/09-sampling/03-SMARTS/SMARTS.py | 8 ++++---- .../09-sampling/03-SMARTS/complete/SMARTS.py | 8 ++++---- .../10-modeling-power/completed/three_level.py | 2 +- materials/02-Using-gem5/10-modeling-power/test-cache.py | 2 +- materials/02-Using-gem5/10-modeling-power/three_level.py | 2 +- .../01-multiprocessing-via-script/experiment.py | 2 +- .../01-multiprocessing-via-script/run-experiment.sh | 8 ++++---- .../multisim-experiment.py | 2 +- .../multisim-experiment.py | 6 +++--- .../inspector-gadget/components/hybrid_generator.py | 2 +- .../inspector-gadget/first-inspector-gadget-example.py | 2 +- .../inspector-gadget/components/hybrid_generator.py | 2 +- .../inspector-gadget/first-inspector-gadget-example.py | 2 +- .../inspector-gadget/components/hybrid_generator.py | 2 +- .../inspector-gadget/first-inspector-gadget-example.py | 2 +- .../07-chi-protocol/completed/run-test.py | 2 +- .../03-Developing-gem5-models/08-ruby-network/run-test.py | 2 +- .../01-local-inst-tracker/simple-sim.py | 6 +++--- .../02-global-inst-tracker/simple-sim.py | 6 +++--- materials/05-Other-simulators/02-dram/dramsim-example.py | 2 +- materials/05-Other-simulators/02-dram/dramsys-example.py | 4 ++-- .../01-testlib-example/completed/example_config.py | 2 +- .../02-testing/01-testlib-example/example_config.py | 2 +- 72 files changed, 144 insertions(+), 144 deletions(-) diff --git a/materials/01-Introduction/03-python-background/17-inheriting-from-a-simobject.py b/materials/01-Introduction/03-python-background/17-inheriting-from-a-simobject.py index c42eac5..423300e 100644 --- a/materials/01-Introduction/03-python-background/17-inheriting-from-a-simobject.py +++ b/materials/01-Introduction/03-python-background/17-inheriting-from-a-simobject.py @@ -48,7 +48,7 @@ class L1ICache(L1Cache): """Simple L1 instruction cache with default values""" # Set the size - size = "32kB" + size = "32KiB" def __init__(self): super().__init__() diff --git a/materials/02-Using-gem5/01-stdlib/01-02-fs-mode.py b/materials/02-Using-gem5/01-stdlib/01-02-fs-mode.py index 9cfd970..601c566 100644 --- a/materials/02-Using-gem5/01-stdlib/01-02-fs-mode.py +++ b/materials/02-Using-gem5/01-stdlib/01-02-fs-mode.py @@ -24,4 +24,4 @@ ) # Set up the system memory. -memory = SingleChannelDDR3_1600(size="3GB") +memory = SingleChannelDDR3_1600(size="3GiB") diff --git a/materials/02-Using-gem5/01-stdlib/02-processor.py b/materials/02-Using-gem5/01-stdlib/02-processor.py index b63a8c1..97bb5fe 100644 --- a/materials/02-Using-gem5/01-stdlib/02-processor.py +++ b/materials/02-Using-gem5/01-stdlib/02-processor.py @@ -42,14 +42,14 @@ def __init__(self, width, rob_size, num_int_regs, num_fp_regs): pass -main_memory = SingleChannelDDR4_2400(size="2GB") +main_memory = SingleChannelDDR4_2400(size="2GiB") cache_hierarchy = MESITwoLevelCacheHierarchy( - l1d_size="16kB", + l1d_size="16KiB", l1d_assoc=8, - l1i_size="16kB", + l1i_size="16KiB", l1i_assoc=8, - l2_size="256kB", + l2_size="256KiB", l2_assoc=16, num_l2_banks=1, ) diff --git a/materials/02-Using-gem5/01-stdlib/completed/01-02-fs-mode.py b/materials/02-Using-gem5/01-stdlib/completed/01-02-fs-mode.py index 5f1ffc0..380bf26 100644 --- a/materials/02-Using-gem5/01-stdlib/completed/01-02-fs-mode.py +++ b/materials/02-Using-gem5/01-stdlib/completed/01-02-fs-mode.py @@ -14,17 +14,17 @@ # Here we setup a MESI Two Level Cache Hierarchy. cache_hierarchy = MESITwoLevelCacheHierarchy( - l1d_size="16kB", + l1d_size="16KiB", l1d_assoc=8, - l1i_size="16kB", + l1i_size="16KiB", l1i_assoc=8, - l2_size="256kB", + l2_size="256KiB", l2_assoc=16, num_l2_banks=1, ) # Setup the system memory. -memory = SingleChannelDDR3_1600(size="3GB") +memory = SingleChannelDDR3_1600(size="3GiB") # Here we setup the processor. This is a special switchable processor in which # a starting core type and a switch core type must be specified. Once a diff --git a/materials/02-Using-gem5/01-stdlib/completed/01-components.py b/materials/02-Using-gem5/01-stdlib/completed/01-components.py index f972e9c..0c360e1 100644 --- a/materials/02-Using-gem5/01-stdlib/completed/01-components.py +++ b/materials/02-Using-gem5/01-stdlib/completed/01-components.py @@ -21,11 +21,11 @@ # Here we setup a MESI Two Level Cache Hierarchy. cache_hierarchy = MESITwoLevelCacheHierarchy( - l1d_size="16kB", + l1d_size="16KiB", l1d_assoc=8, - l1i_size="16kB", + l1i_size="16KiB", l1i_assoc=8, - l2_size="256kB", + l2_size="256KiB", l2_assoc=16, num_l2_banks=1, ) diff --git a/materials/02-Using-gem5/01-stdlib/completed/02-processor.py b/materials/02-Using-gem5/01-stdlib/completed/02-processor.py index f54b46f..29d6d25 100644 --- a/materials/02-Using-gem5/01-stdlib/completed/02-processor.py +++ b/materials/02-Using-gem5/01-stdlib/completed/02-processor.py @@ -58,14 +58,14 @@ def __init__(self, width, rob_size, num_int_regs, num_fp_regs): super().__init__(cores) -main_memory = SingleChannelDDR4_2400(size="2GB") +main_memory = SingleChannelDDR4_2400(size="2GiB") cache_hierarchy = MESITwoLevelCacheHierarchy( - l1d_size="16kB", + l1d_size="16KiB", l1d_assoc=8, - l1i_size="16kB", + l1i_size="16KiB", l1i_assoc=8, - l2_size="256kB", + l2_size="256KiB", l2_assoc=16, num_l2_banks=1, ) diff --git a/materials/02-Using-gem5/02-gem5-resources/01-hello-example.py b/materials/02-Using-gem5/02-gem5-resources/01-hello-example.py index 5e32c6e..b28d7d4 100644 --- a/materials/02-Using-gem5/02-gem5-resources/01-hello-example.py +++ b/materials/02-Using-gem5/02-gem5-resources/01-hello-example.py @@ -23,7 +23,7 @@ from gem5.simulate.simulator import Simulator -memory = SingleChannelDDR4_2400(size="2GB") +memory = SingleChannelDDR4_2400(size="2GiB") caches = PrivateL1SharedL2CacheHierarchy( l1d_size="32KiB", diff --git a/materials/02-Using-gem5/02-gem5-resources/02-suite-workload-example.py b/materials/02-Using-gem5/02-gem5-resources/02-suite-workload-example.py index 4058375..47a85ac 100644 --- a/materials/02-Using-gem5/02-gem5-resources/02-suite-workload-example.py +++ b/materials/02-Using-gem5/02-gem5-resources/02-suite-workload-example.py @@ -28,7 +28,7 @@ from gem5.simulate.simulator import Simulator -memory = SingleChannelDDR4_2400(size="2GB") +memory = SingleChannelDDR4_2400(size="2GiB") caches = PrivateL1SharedL2CacheHierarchy( l1d_size="32KiB", diff --git a/materials/02-Using-gem5/02-gem5-resources/03-run-local-resource-local-path.py b/materials/02-Using-gem5/02-gem5-resources/03-run-local-resource-local-path.py index fe86737..f1ef5ce 100644 --- a/materials/02-Using-gem5/02-gem5-resources/03-run-local-resource-local-path.py +++ b/materials/02-Using-gem5/02-gem5-resources/03-run-local-resource-local-path.py @@ -16,12 +16,12 @@ cache_hierarchy = NoCache() # We use a single channel DDR3_1600 memory system -memory = SingleChannelDDR3_1600(size="32MB") +memory = SingleChannelDDR3_1600(size="32MiB") # We use a simple Timing processor with one core. processor = SimpleProcessor(cpu_type=CPUTypes.ATOMIC, isa=ISA.X86, num_cores=1) -# The gem5 library simble board which can be used to run simple SE-mode +# The gem5 library simple board which can be used to run simple SE-mode # simulations. board = SimpleBoard( clk_freq="3GHz", diff --git a/materials/02-Using-gem5/02-gem5-resources/04-run-local-resource-json.py b/materials/02-Using-gem5/02-gem5-resources/04-run-local-resource-json.py index bf05fdb..6d944e0 100644 --- a/materials/02-Using-gem5/02-gem5-resources/04-run-local-resource-json.py +++ b/materials/02-Using-gem5/02-gem5-resources/04-run-local-resource-json.py @@ -16,12 +16,12 @@ cache_hierarchy = NoCache() # We use a single channel DDR3_1600 memory system -memory = SingleChannelDDR3_1600(size="32MB") +memory = SingleChannelDDR3_1600(size="32MiB") # We use a simple Timing processor with one core. processor = SimpleProcessor(cpu_type=CPUTypes.ATOMIC, isa=ISA.X86, num_cores=1) -# The gem5 library simble board which can be used to run simple SE-mode +# The gem5 library simple board which can be used to run simple SE-mode # simulations. board = SimpleBoard( clk_freq="3GHz", diff --git a/materials/02-Using-gem5/02-gem5-resources/complete/01-hello-example.py b/materials/02-Using-gem5/02-gem5-resources/complete/01-hello-example.py index 6f7bbc3..657f166 100644 --- a/materials/02-Using-gem5/02-gem5-resources/complete/01-hello-example.py +++ b/materials/02-Using-gem5/02-gem5-resources/complete/01-hello-example.py @@ -22,7 +22,7 @@ from gem5.simulate.simulator import Simulator -memory = SingleChannelDDR4_2400(size="2GB") +memory = SingleChannelDDR4_2400(size="2GiB") caches = PrivateL1SharedL2CacheHierarchy( l1d_size="32KiB", diff --git a/materials/02-Using-gem5/02-gem5-resources/complete/02-suite-workload-example.py b/materials/02-Using-gem5/02-gem5-resources/complete/02-suite-workload-example.py index 86f873a..ef9efe1 100644 --- a/materials/02-Using-gem5/02-gem5-resources/complete/02-suite-workload-example.py +++ b/materials/02-Using-gem5/02-gem5-resources/complete/02-suite-workload-example.py @@ -28,7 +28,7 @@ from gem5.simulate.simulator import Simulator -memory = SingleChannelDDR4_2400(size="2GB") +memory = SingleChannelDDR4_2400(size="2GiB") caches = PrivateL1SharedL2CacheHierarchy( l1d_size="32KiB", diff --git a/materials/02-Using-gem5/02-gem5-resources/complete/03-run-local-resource-local-path.py b/materials/02-Using-gem5/02-gem5-resources/complete/03-run-local-resource-local-path.py index 98e7419..b5d41f5 100644 --- a/materials/02-Using-gem5/02-gem5-resources/complete/03-run-local-resource-local-path.py +++ b/materials/02-Using-gem5/02-gem5-resources/complete/03-run-local-resource-local-path.py @@ -16,12 +16,12 @@ cache_hierarchy = NoCache() # We use a single channel DDR3_1600 memory system -memory = SingleChannelDDR3_1600(size="32MB") +memory = SingleChannelDDR3_1600(size="32MiB") # We use a simple Timing processor with one core. processor = SimpleProcessor(cpu_type=CPUTypes.ATOMIC, isa=ISA.X86, num_cores=1) -# The gem5 library simble board which can be used to run simple SE-mode +# The gem5 library siple board which can be used to run simple SE-mode # simulations. board = SimpleBoard( clk_freq="3GHz", diff --git a/materials/02-Using-gem5/02-gem5-resources/complete/04-run-local-resource-json.py b/materials/02-Using-gem5/02-gem5-resources/complete/04-run-local-resource-json.py index 25195ed..13983e5 100644 --- a/materials/02-Using-gem5/02-gem5-resources/complete/04-run-local-resource-json.py +++ b/materials/02-Using-gem5/02-gem5-resources/complete/04-run-local-resource-json.py @@ -16,12 +16,12 @@ cache_hierarchy = NoCache() # We use a single channel DDR3_1600 memory system -memory = SingleChannelDDR3_1600(size="32MB") +memory = SingleChannelDDR3_1600(size="32MiB") # We use a simple Timing processor with one core. processor = SimpleProcessor(cpu_type=CPUTypes.ATOMIC, isa=ISA.X86, num_cores=1) -# The gem5 library simble board which can be used to run simple SE-mode +# The gem5 library simple board which can be used to run simple SE-mode # simulations. board = SimpleBoard( clk_freq="3GHz", diff --git a/materials/02-Using-gem5/03-running-in-gem5/00-SE-hello-world/00-SE-hello-world.py b/materials/02-Using-gem5/03-running-in-gem5/00-SE-hello-world/00-SE-hello-world.py index ee175b1..7c18cb5 100644 --- a/materials/02-Using-gem5/03-running-in-gem5/00-SE-hello-world/00-SE-hello-world.py +++ b/materials/02-Using-gem5/03-running-in-gem5/00-SE-hello-world/00-SE-hello-world.py @@ -52,11 +52,11 @@ cache_hierarchy = PrivateL1CacheHierarchy( - l1d_size="64kB", - l1i_size="64kB", + l1d_size="64KiB", + l1i_size="64KiB", ) -memory = SingleChannelDDR4_2400("1GB") +memory = SingleChannelDDR4_2400("1GiB") processor = SimpleProcessor( cpu_type = CPUTypes.TIMING, diff --git a/materials/02-Using-gem5/03-running-in-gem5/03-run-x86-SE/03-run-x86-SE.py b/materials/02-Using-gem5/03-running-in-gem5/03-run-x86-SE/03-run-x86-SE.py index 1a43ee3..fefc7c8 100644 --- a/materials/02-Using-gem5/03-running-in-gem5/03-run-x86-SE/03-run-x86-SE.py +++ b/materials/02-Using-gem5/03-running-in-gem5/03-run-x86-SE/03-run-x86-SE.py @@ -52,11 +52,11 @@ cache_hierarchy = PrivateL1CacheHierarchy( - l1d_size="64kB", - l1i_size="64kB", + l1d_size="64KiB", + l1i_size="64KiB", ) -memory = SingleChannelDDR4_2400("1GB") +memory = SingleChannelDDR4_2400("1GiB") processor = SimpleProcessor( cpu_type = CPUTypes.TIMING, diff --git a/materials/02-Using-gem5/03-running-in-gem5/03-run-x86-SE/complete/03-run-x86-SE.py b/materials/02-Using-gem5/03-running-in-gem5/03-run-x86-SE/complete/03-run-x86-SE.py index 85574e8..44644b8 100644 --- a/materials/02-Using-gem5/03-running-in-gem5/03-run-x86-SE/complete/03-run-x86-SE.py +++ b/materials/02-Using-gem5/03-running-in-gem5/03-run-x86-SE/complete/03-run-x86-SE.py @@ -52,11 +52,11 @@ cache_hierarchy = PrivateL1CacheHierarchy( - l1d_size="64kB", - l1i_size="64kB", + l1d_size="64KiB", + l1i_size="64KiB", ) -memory = SingleChannelDDR4_2400("1GB") +memory = SingleChannelDDR4_2400("1GiB") processor = SimpleProcessor( cpu_type = CPUTypes.TIMING, diff --git a/materials/02-Using-gem5/03-running-in-gem5/05-run-arm-SE/05-run-arm-SE.py b/materials/02-Using-gem5/03-running-in-gem5/05-run-arm-SE/05-run-arm-SE.py index 5fe0516..d0bf8d4 100644 --- a/materials/02-Using-gem5/03-running-in-gem5/05-run-arm-SE/05-run-arm-SE.py +++ b/materials/02-Using-gem5/03-running-in-gem5/05-run-arm-SE/05-run-arm-SE.py @@ -77,11 +77,11 @@ cache_hierarchy = PrivateL1CacheHierarchy( - l1d_size="64kB", - l1i_size="64kB", + l1d_size="64KiB", + l1i_size="64KiB", ) -memory = SingleChannelDDR4_2400("1GB") +memory = SingleChannelDDR4_2400("1GiB") processor = SimpleProcessor( cpu_type = CPUTypes.TIMING, diff --git a/materials/02-Using-gem5/03-running-in-gem5/05-run-arm-SE/complete/05-run-arm-SE.py b/materials/02-Using-gem5/03-running-in-gem5/05-run-arm-SE/complete/05-run-arm-SE.py index 1ef3bf7..04954b0 100644 --- a/materials/02-Using-gem5/03-running-in-gem5/05-run-arm-SE/complete/05-run-arm-SE.py +++ b/materials/02-Using-gem5/03-running-in-gem5/05-run-arm-SE/complete/05-run-arm-SE.py @@ -77,11 +77,11 @@ cache_hierarchy = PrivateL1CacheHierarchy( - l1d_size="64kB", - l1i_size="64kB", + l1d_size="64KiB", + l1i_size="64KiB", ) -memory = SingleChannelDDR4_2400("1GB") +memory = SingleChannelDDR4_2400("1GiB") processor = SimpleProcessor( cpu_type = CPUTypes.TIMING, diff --git a/materials/02-Using-gem5/03-running-in-gem5/06-traffic-gen/completed/step-1-linear-gen/simple-traffic-generators.py b/materials/02-Using-gem5/03-running-in-gem5/06-traffic-gen/completed/step-1-linear-gen/simple-traffic-generators.py index d848a46..61972c6 100644 --- a/materials/02-Using-gem5/03-running-in-gem5/06-traffic-gen/completed/step-1-linear-gen/simple-traffic-generators.py +++ b/materials/02-Using-gem5/03-running-in-gem5/06-traffic-gen/completed/step-1-linear-gen/simple-traffic-generators.py @@ -43,7 +43,7 @@ memory = SingleChannelDDR3_1600() -generator = LinearGenerator(num_cores=1, rate="1GB/s") +generator = LinearGenerator(num_cores=1, rate="1GiB/s") motherboard = TestBoard( clk_freq="3GHz", diff --git a/materials/02-Using-gem5/03-running-in-gem5/06-traffic-gen/completed/step-2-hybrid-gen/components/hybrid_generator.py b/materials/02-Using-gem5/03-running-in-gem5/06-traffic-gen/completed/step-2-hybrid-gen/components/hybrid_generator.py index 11694a7..edf9963 100644 --- a/materials/02-Using-gem5/03-running-in-gem5/06-traffic-gen/completed/step-2-hybrid-gen/components/hybrid_generator.py +++ b/materials/02-Using-gem5/03-running-in-gem5/06-traffic-gen/completed/step-2-hybrid-gen/components/hybrid_generator.py @@ -46,7 +46,7 @@ def __init__( self, num_cores: int = 2, duration: str = "1ms", - rate: str = "1GB/s", + rate: str = "1GiB/s", block_size: int = 8, min_addr: int = 0, max_addr: int = 131072, diff --git a/materials/02-Using-gem5/03-running-in-gem5/06-traffic-gen/components/hybrid_generator.py b/materials/02-Using-gem5/03-running-in-gem5/06-traffic-gen/components/hybrid_generator.py index 7a1bf9b..5ff2a4a 100644 --- a/materials/02-Using-gem5/03-running-in-gem5/06-traffic-gen/components/hybrid_generator.py +++ b/materials/02-Using-gem5/03-running-in-gem5/06-traffic-gen/components/hybrid_generator.py @@ -46,7 +46,7 @@ def __init__( self, num_cores: int = 2, duration: str = "1ms", - rate: str = "1GB/s", + rate: str = "1GiB/s", block_size: int = 8, min_addr: int = 0, max_addr: int = 131072, diff --git a/materials/02-Using-gem5/05-cache-hierarchies/completed/three_level.py b/materials/02-Using-gem5/05-cache-hierarchies/completed/three_level.py index 150aa63..521da75 100644 --- a/materials/02-Using-gem5/05-cache-hierarchies/completed/three_level.py +++ b/materials/02-Using-gem5/05-cache-hierarchies/completed/three_level.py @@ -147,7 +147,7 @@ def _setup_io_cache(self, board: AbstractBoard) -> None: data_latency=50, response_latency=50, mshrs=20, - size="1kB", + size="1KiB", tgts_per_mshr=12, addr_ranges=board.mem_ranges, ) diff --git a/materials/02-Using-gem5/05-cache-hierarchies/test-cache.py b/materials/02-Using-gem5/05-cache-hierarchies/test-cache.py index 0fa80f0..2d1aeaa 100644 --- a/materials/02-Using-gem5/05-cache-hierarchies/test-cache.py +++ b/materials/02-Using-gem5/05-cache-hierarchies/test-cache.py @@ -25,7 +25,7 @@ l3_size="2MiB", l3_assoc=32, ), - memory=DualChannelDDR4_2400(size="2GB"), + memory=DualChannelDDR4_2400(size="2GiB"), clk_freq="3GHz", ) diff --git a/materials/02-Using-gem5/05-cache-hierarchies/three_level.py b/materials/02-Using-gem5/05-cache-hierarchies/three_level.py index a4fe166..0dc4667 100644 --- a/materials/02-Using-gem5/05-cache-hierarchies/three_level.py +++ b/materials/02-Using-gem5/05-cache-hierarchies/three_level.py @@ -96,7 +96,7 @@ def _setup_io_cache(self, board: AbstractBoard) -> None: data_latency=50, response_latency=50, mshrs=20, - size="1kB", + size="1KiB", tgts_per_mshr=12, addr_ranges=board.mem_ranges, ) diff --git a/materials/02-Using-gem5/06-memory/blank_memory.py b/materials/02-Using-gem5/06-memory/blank_memory.py index 470c34d..ae8c469 100644 --- a/materials/02-Using-gem5/06-memory/blank_memory.py +++ b/materials/02-Using-gem5/06-memory/blank_memory.py @@ -43,7 +43,7 @@ def createLinearTraffic(tgen): # Set up the system system.mem_mode = 'timing' -system.mem_ranges = [AddrRange('512MB')] # Create an address range +system.mem_ranges = [AddrRange('512MiB')] # Create an address range addr_range = system.mem_ranges[0] system.tgen = PyTrafficGen() # Create a traffic generator diff --git a/materials/02-Using-gem5/06-memory/comm_monitor.py b/materials/02-Using-gem5/06-memory/comm_monitor.py index 7b29a71..76bc1e4 100644 --- a/materials/02-Using-gem5/06-memory/comm_monitor.py +++ b/materials/02-Using-gem5/06-memory/comm_monitor.py @@ -40,13 +40,13 @@ def createLinearTraffic(tgen): # Set up the system system.mem_mode = 'timing' -system.mem_ranges = [AddrRange('512MB')] # Create an address range +system.mem_ranges = [AddrRange('512MiB')] # Create an address range addr_range = system.mem_ranges[0] system.tgen = PyTrafficGen() # Create a traffic generator system.l1cache = SimpleCache() -system.l1cache.size = '32kB' +system.l1cache.size = '32KiB' system.membus = SystemXBar(width = 64, max_routing_table_size = 16777216) @@ -59,7 +59,7 @@ def createLinearTraffic(tgen): # memory interface parameters system.mem_ctrl.dram = DDR4_2400_16x4() -system.mem_ctrl.dram.range = AddrRange('512MB') +system.mem_ctrl.dram.range = AddrRange('512MiB') system.mem_ctrl.dram.read_buffer_size = 32 system.mem_ctrl.dram.write_buffer_size = 64 diff --git a/materials/02-Using-gem5/06-memory/completed/blank_memory.py b/materials/02-Using-gem5/06-memory/completed/blank_memory.py index 3401245..00309e5 100644 --- a/materials/02-Using-gem5/06-memory/completed/blank_memory.py +++ b/materials/02-Using-gem5/06-memory/completed/blank_memory.py @@ -41,7 +41,7 @@ def createLinearTraffic(tgen): # Set up the system system.mem_mode = 'timing' -system.mem_ranges = [AddrRange('512MB')] # Create an address range +system.mem_ranges = [AddrRange('512MiB')] # Create an address range addr_range = system.mem_ranges[0] system.tgen = PyTrafficGen() # Create a traffic generator @@ -58,7 +58,7 @@ def createLinearTraffic(tgen): # memory interface parameters system.mem_ctrl.dram = DDR4_2400_16x4() -system.mem_ctrl.dram.range = AddrRange('512MB') +system.mem_ctrl.dram.range = AddrRange('512MiB') system.mem_ctrl.dram.read_buffer_size = 32 system.mem_ctrl.dram.write_buffer_size = 64 diff --git a/materials/02-Using-gem5/06-memory/completed/comm_monitor.py b/materials/02-Using-gem5/06-memory/completed/comm_monitor.py index fed3c99..e8c2053 100644 --- a/materials/02-Using-gem5/06-memory/completed/comm_monitor.py +++ b/materials/02-Using-gem5/06-memory/completed/comm_monitor.py @@ -40,7 +40,7 @@ def createLinearTraffic(tgen): # Set up the system system.mem_mode = 'timing' -system.mem_ranges = [AddrRange('512MB')] # Create an address range +system.mem_ranges = [AddrRange('512MiB')] # Create an address range addr_range = system.mem_ranges[0] system.tgen = PyTrafficGen() # Create a traffic generator @@ -48,7 +48,7 @@ def createLinearTraffic(tgen): system.membus = SystemXBar(width = 64, max_routing_table_size = 16777216) system.l1cache = SimpleCache() -system.l1cache.size = '32kB' +system.l1cache.size = '32KiB' system.tgen.port = system.l1cache.cpu_side # system.l2cache.mem_side = system.membus.cpu_side_ports @@ -59,10 +59,10 @@ def createLinearTraffic(tgen): # memory interface parameters system.mem_ctrl.dram = DDR4_2400_16x4() -system.mem_ctrl.dram.range = AddrRange('512MB') +system.mem_ctrl.dram.range = AddrRange('512MiB') system.mem_ctrl.dram.read_buffer_size = 32 system.mem_ctrl.dram.write_buffer_size = 64 -system.mem_ctrl.dram.device_size = '512MB' +system.mem_ctrl.dram.device_size = '512MiB' system.mem_ctrl.port = system.membus.mem_side_ports diff --git a/materials/02-Using-gem5/07-full-system/completed/x86-fs-gapbs-kvm-run.py b/materials/02-Using-gem5/07-full-system/completed/x86-fs-gapbs-kvm-run.py index e211889..5721489 100644 --- a/materials/02-Using-gem5/07-full-system/completed/x86-fs-gapbs-kvm-run.py +++ b/materials/02-Using-gem5/07-full-system/completed/x86-fs-gapbs-kvm-run.py @@ -24,11 +24,11 @@ # Here we setup a MESI Two Level Cache Hierarchy. cache_hierarchy = PrivateL1PrivateL2WalkCacheHierarchy( - l1d_size="16kB", l1i_size="16kB", l2_size="256kB" + l1d_size="16KiB", l1i_size="16KiB", l2_size="256KiB" ) # Setup the system memory. -memory = SingleChannelDDR3_1600(size="3GB") +memory = SingleChannelDDR3_1600(size="3GiB") # Here we setup the processor. processor = SimpleProcessor( diff --git a/materials/02-Using-gem5/07-full-system/completed/x86-fs-kvm-run.py b/materials/02-Using-gem5/07-full-system/completed/x86-fs-kvm-run.py index 468186c..e4f08dc 100644 --- a/materials/02-Using-gem5/07-full-system/completed/x86-fs-kvm-run.py +++ b/materials/02-Using-gem5/07-full-system/completed/x86-fs-kvm-run.py @@ -24,11 +24,11 @@ # Here we setup a MESI Two Level Cache Hierarchy. cache_hierarchy = PrivateL1PrivateL2WalkCacheHierarchy( - l1d_size="16kB", l1i_size="16kB", l2_size="256kB" + l1d_size="16KiB", l1i_size="16KiB", l2_size="256KiB" ) # Setup the system memory. -memory = SingleChannelDDR3_1600(size="3GB") +memory = SingleChannelDDR3_1600(size="3GiB") # Here we setup the processor. processor = SimpleProcessor( diff --git a/materials/02-Using-gem5/07-full-system/x86-fs-gapbs-kvm-run.py b/materials/02-Using-gem5/07-full-system/x86-fs-gapbs-kvm-run.py index e211889..5721489 100644 --- a/materials/02-Using-gem5/07-full-system/x86-fs-gapbs-kvm-run.py +++ b/materials/02-Using-gem5/07-full-system/x86-fs-gapbs-kvm-run.py @@ -24,11 +24,11 @@ # Here we setup a MESI Two Level Cache Hierarchy. cache_hierarchy = PrivateL1PrivateL2WalkCacheHierarchy( - l1d_size="16kB", l1i_size="16kB", l2_size="256kB" + l1d_size="16KiB", l1i_size="16KiB", l2_size="256KiB" ) # Setup the system memory. -memory = SingleChannelDDR3_1600(size="3GB") +memory = SingleChannelDDR3_1600(size="3GiB") # Here we setup the processor. processor = SimpleProcessor( diff --git a/materials/02-Using-gem5/07-full-system/x86-fs-kvm-run.py b/materials/02-Using-gem5/07-full-system/x86-fs-kvm-run.py index 3c3d979..13765d5 100644 --- a/materials/02-Using-gem5/07-full-system/x86-fs-kvm-run.py +++ b/materials/02-Using-gem5/07-full-system/x86-fs-kvm-run.py @@ -24,11 +24,11 @@ # Here we setup a MESI Two Level Cache Hierarchy. cache_hierarchy = PrivateL1PrivateL2WalkCacheHierarchy( - l1d_size="16kB", l1i_size="16kB", l2_size="256kB" + l1d_size="16KiB", l1i_size="16KiB", l2_size="256KiB" ) # Setup the system memory. -memory = SingleChannelDDR3_1600(size="3GB") +memory = SingleChannelDDR3_1600(size="3GiB") # Here we setup the processor. processor = SimpleProcessor( diff --git a/materials/02-Using-gem5/08-accelerating-simulation/02-kvm-time/02-kvm-time.py b/materials/02-Using-gem5/08-accelerating-simulation/02-kvm-time/02-kvm-time.py index 8b944cf..57ab218 100644 --- a/materials/02-Using-gem5/08-accelerating-simulation/02-kvm-time/02-kvm-time.py +++ b/materials/02-Using-gem5/08-accelerating-simulation/02-kvm-time/02-kvm-time.py @@ -58,11 +58,11 @@ cache_hierarchy = PrivateL1CacheHierarchy( - l1d_size="32kB", - l1i_size="32kB" + l1d_size="32KiB", + l1i_size="32KiB" ) -memory = DualChannelDDR4_2400(size="3GB") +memory = DualChannelDDR4_2400(size="3GiB") # Here we set up the processor. The SimpleSwitchableProcessor allows for # switching between different CPU types during simulation, such as KVM to Timing diff --git a/materials/02-Using-gem5/08-accelerating-simulation/02-kvm-time/complete/02-kvm-time.py b/materials/02-Using-gem5/08-accelerating-simulation/02-kvm-time/complete/02-kvm-time.py index e0d5e06..8f4e7dd 100644 --- a/materials/02-Using-gem5/08-accelerating-simulation/02-kvm-time/complete/02-kvm-time.py +++ b/materials/02-Using-gem5/08-accelerating-simulation/02-kvm-time/complete/02-kvm-time.py @@ -57,11 +57,11 @@ cache_hierarchy = PrivateL1CacheHierarchy( - l1d_size="32kB", - l1i_size="32kB" + l1d_size="32KiB", + l1i_size="32KiB" ) -memory = DualChannelDDR4_2400(size="3GB") +memory = DualChannelDDR4_2400(size="3GiB") # Here we setup the processor. The SimpleSwitchableProcessor allows for # switching between different CPU types during simulation, such as KVM to Timing diff --git a/materials/02-Using-gem5/08-accelerating-simulation/03-checkpoint-and-restore/03-restore-the-checkpoint.py b/materials/02-Using-gem5/08-accelerating-simulation/03-checkpoint-and-restore/03-restore-the-checkpoint.py index af96fcb..c5b78c3 100644 --- a/materials/02-Using-gem5/08-accelerating-simulation/03-checkpoint-and-restore/03-restore-the-checkpoint.py +++ b/materials/02-Using-gem5/08-accelerating-simulation/03-checkpoint-and-restore/03-restore-the-checkpoint.py @@ -56,11 +56,11 @@ cache_hierarchy = PrivateL1CacheHierarchy( - l1d_size="32kB", - l1i_size="32kB" + l1d_size="32KiB", + l1i_size="32KiB" ) -memory = DualChannelDDR4_2400(size="3GB") +memory = DualChannelDDR4_2400(size="3GiB") processor = SimpleProcessor( cpu_type=CPUTypes.TIMING, diff --git a/materials/02-Using-gem5/08-accelerating-simulation/03-checkpoint-and-restore/03-take-a-checkpoint.py b/materials/02-Using-gem5/08-accelerating-simulation/03-checkpoint-and-restore/03-take-a-checkpoint.py index 568e4f1..5af977f 100644 --- a/materials/02-Using-gem5/08-accelerating-simulation/03-checkpoint-and-restore/03-take-a-checkpoint.py +++ b/materials/02-Using-gem5/08-accelerating-simulation/03-checkpoint-and-restore/03-take-a-checkpoint.py @@ -53,7 +53,7 @@ # -# Let's set up a SingleChannelDDR4_2400 memory with 3GB size +# Let's set up a SingleChannelDDR4_2400 memory with 3GiB size # diff --git a/materials/02-Using-gem5/08-accelerating-simulation/03-checkpoint-and-restore/complete/03-restore-the-checkpoint.py b/materials/02-Using-gem5/08-accelerating-simulation/03-checkpoint-and-restore/complete/03-restore-the-checkpoint.py index 6d2e2c8..d6e83a8 100644 --- a/materials/02-Using-gem5/08-accelerating-simulation/03-checkpoint-and-restore/complete/03-restore-the-checkpoint.py +++ b/materials/02-Using-gem5/08-accelerating-simulation/03-checkpoint-and-restore/complete/03-restore-the-checkpoint.py @@ -56,11 +56,11 @@ cache_hierarchy = PrivateL1CacheHierarchy( - l1d_size="32kB", - l1i_size="32kB" + l1d_size="32KiB", + l1i_size="32KiB" ) -memory = DualChannelDDR4_2400(size="3GB") +memory = DualChannelDDR4_2400(size="3GiB") # Let's setup a simple processor with the TIMING CPU processor = SimpleProcessor( diff --git a/materials/02-Using-gem5/08-accelerating-simulation/03-checkpoint-and-restore/complete/03-take-a-checkpoint.py b/materials/02-Using-gem5/08-accelerating-simulation/03-checkpoint-and-restore/complete/03-take-a-checkpoint.py index 0354c08..0114117 100644 --- a/materials/02-Using-gem5/08-accelerating-simulation/03-checkpoint-and-restore/complete/03-take-a-checkpoint.py +++ b/materials/02-Using-gem5/08-accelerating-simulation/03-checkpoint-and-restore/complete/03-take-a-checkpoint.py @@ -54,9 +54,9 @@ cache_hierarchy = NoCache() # -# Let's setup a SingleChannelDDR4_2400 memory with 3GB size +# Let's setup a SingleChannelDDR4_2400 memory with 3GiB size from gem5.components.memory.single_channel import SingleChannelDDR4_2400 -memory = SingleChannelDDR4_2400(size="3GB") +memory = SingleChannelDDR4_2400(size="3GiB") # # Here we setup a simple processor with the KVM CPU diff --git a/materials/02-Using-gem5/09-sampling/01-simpoint/complete/full-detailed-run.py b/materials/02-Using-gem5/09-sampling/01-simpoint/complete/full-detailed-run.py index 72e8ac4..8eb811a 100644 --- a/materials/02-Using-gem5/09-sampling/01-simpoint/complete/full-detailed-run.py +++ b/materials/02-Using-gem5/09-sampling/01-simpoint/complete/full-detailed-run.py @@ -50,12 +50,12 @@ requires(isa_required=ISA.X86) cache_hierarchy = PrivateL1PrivateL2WalkCacheHierarchy( - l1d_size="32kB", - l1i_size="32kB", - l2_size="256kB", + l1d_size="32KiB", + l1i_size="32KiB", + l2_size="256KiB", ) -memory = DualChannelDDR4_2400(size="3GB") +memory = DualChannelDDR4_2400(size="3GiB") processor = SimpleProcessor( cpu_type=CPUTypes.O3, diff --git a/materials/02-Using-gem5/09-sampling/01-simpoint/complete/simpoint-analysis.py b/materials/02-Using-gem5/09-sampling/01-simpoint/complete/simpoint-analysis.py index 02a265c..7bff75a 100644 --- a/materials/02-Using-gem5/09-sampling/01-simpoint/complete/simpoint-analysis.py +++ b/materials/02-Using-gem5/09-sampling/01-simpoint/complete/simpoint-analysis.py @@ -49,7 +49,7 @@ cache_hierarchy = NoCache() -memory = SingleChannelDDR3_1600(size="3GB") +memory = SingleChannelDDR3_1600(size="3GiB") processor = SimpleProcessor( cpu_type=CPUTypes.ATOMIC, diff --git a/materials/02-Using-gem5/09-sampling/01-simpoint/complete/simpoint-checkpiont.py b/materials/02-Using-gem5/09-sampling/01-simpoint/complete/simpoint-checkpiont.py index 68da49f..3b75d78 100644 --- a/materials/02-Using-gem5/09-sampling/01-simpoint/complete/simpoint-checkpiont.py +++ b/materials/02-Using-gem5/09-sampling/01-simpoint/complete/simpoint-checkpiont.py @@ -52,7 +52,7 @@ cache_hierarchy = NoCache() -memory = SingleChannelDDR3_1600(size="3GB") +memory = SingleChannelDDR3_1600(size="3GiB") processor = SimpleProcessor( cpu_type=CPUTypes.ATOMIC, diff --git a/materials/02-Using-gem5/09-sampling/01-simpoint/complete/simpoint-run.py b/materials/02-Using-gem5/09-sampling/01-simpoint/complete/simpoint-run.py index 8ebc48f..620bb97 100644 --- a/materials/02-Using-gem5/09-sampling/01-simpoint/complete/simpoint-run.py +++ b/materials/02-Using-gem5/09-sampling/01-simpoint/complete/simpoint-run.py @@ -59,12 +59,12 @@ args = parser.parse_args() cache_hierarchy = PrivateL1PrivateL2WalkCacheHierarchy( - l1d_size="32kB", - l1i_size="32kB", - l2_size="256kB", + l1d_size="32KiB", + l1i_size="32KiB", + l2_size="256KiB", ) -memory = DualChannelDDR4_2400(size="3GB") +memory = DualChannelDDR4_2400(size="3GiB") processor = SimpleProcessor( cpu_type=CPUTypes.O3, diff --git a/materials/02-Using-gem5/09-sampling/01-simpoint/full-detailed-run.py b/materials/02-Using-gem5/09-sampling/01-simpoint/full-detailed-run.py index 72e8ac4..8eb811a 100644 --- a/materials/02-Using-gem5/09-sampling/01-simpoint/full-detailed-run.py +++ b/materials/02-Using-gem5/09-sampling/01-simpoint/full-detailed-run.py @@ -50,12 +50,12 @@ requires(isa_required=ISA.X86) cache_hierarchy = PrivateL1PrivateL2WalkCacheHierarchy( - l1d_size="32kB", - l1i_size="32kB", - l2_size="256kB", + l1d_size="32KiB", + l1i_size="32KiB", + l2_size="256KiB", ) -memory = DualChannelDDR4_2400(size="3GB") +memory = DualChannelDDR4_2400(size="3GiB") processor = SimpleProcessor( cpu_type=CPUTypes.O3, diff --git a/materials/02-Using-gem5/09-sampling/01-simpoint/simpoint-analysis.py b/materials/02-Using-gem5/09-sampling/01-simpoint/simpoint-analysis.py index 02a265c..7bff75a 100644 --- a/materials/02-Using-gem5/09-sampling/01-simpoint/simpoint-analysis.py +++ b/materials/02-Using-gem5/09-sampling/01-simpoint/simpoint-analysis.py @@ -49,7 +49,7 @@ cache_hierarchy = NoCache() -memory = SingleChannelDDR3_1600(size="3GB") +memory = SingleChannelDDR3_1600(size="3GiB") processor = SimpleProcessor( cpu_type=CPUTypes.ATOMIC, diff --git a/materials/02-Using-gem5/09-sampling/01-simpoint/simpoint-checkpoint.py b/materials/02-Using-gem5/09-sampling/01-simpoint/simpoint-checkpoint.py index 68da49f..3b75d78 100644 --- a/materials/02-Using-gem5/09-sampling/01-simpoint/simpoint-checkpoint.py +++ b/materials/02-Using-gem5/09-sampling/01-simpoint/simpoint-checkpoint.py @@ -52,7 +52,7 @@ cache_hierarchy = NoCache() -memory = SingleChannelDDR3_1600(size="3GB") +memory = SingleChannelDDR3_1600(size="3GiB") processor = SimpleProcessor( cpu_type=CPUTypes.ATOMIC, diff --git a/materials/02-Using-gem5/09-sampling/01-simpoint/simpoint-run.py b/materials/02-Using-gem5/09-sampling/01-simpoint/simpoint-run.py index 8ebc48f..620bb97 100644 --- a/materials/02-Using-gem5/09-sampling/01-simpoint/simpoint-run.py +++ b/materials/02-Using-gem5/09-sampling/01-simpoint/simpoint-run.py @@ -59,12 +59,12 @@ args = parser.parse_args() cache_hierarchy = PrivateL1PrivateL2WalkCacheHierarchy( - l1d_size="32kB", - l1i_size="32kB", - l2_size="256kB", + l1d_size="32KiB", + l1i_size="32KiB", + l2_size="256KiB", ) -memory = DualChannelDDR4_2400(size="3GB") +memory = DualChannelDDR4_2400(size="3GiB") processor = SimpleProcessor( cpu_type=CPUTypes.O3, diff --git a/materials/02-Using-gem5/09-sampling/02-elfies/complete/run-elfies.py b/materials/02-Using-gem5/09-sampling/02-elfies/complete/run-elfies.py index 93692c5..650cc3d 100644 --- a/materials/02-Using-gem5/09-sampling/02-elfies/complete/run-elfies.py +++ b/materials/02-Using-gem5/09-sampling/02-elfies/complete/run-elfies.py @@ -56,7 +56,7 @@ l2_assoc=16 ) -memory = DualChannelDDR4_2400("3GB") +memory = DualChannelDDR4_2400("3GiB") processor = SimpleProcessor( # This processor uses a simple timing CPU with 8 cores diff --git a/materials/02-Using-gem5/09-sampling/02-elfies/run-elfies.py b/materials/02-Using-gem5/09-sampling/02-elfies/run-elfies.py index 93692c5..650cc3d 100644 --- a/materials/02-Using-gem5/09-sampling/02-elfies/run-elfies.py +++ b/materials/02-Using-gem5/09-sampling/02-elfies/run-elfies.py @@ -56,7 +56,7 @@ l2_assoc=16 ) -memory = DualChannelDDR4_2400("3GB") +memory = DualChannelDDR4_2400("3GiB") processor = SimpleProcessor( # This processor uses a simple timing CPU with 8 cores diff --git a/materials/02-Using-gem5/09-sampling/03-SMARTS/SMARTS.py b/materials/02-Using-gem5/09-sampling/03-SMARTS/SMARTS.py index 332f5f9..590824b 100644 --- a/materials/02-Using-gem5/09-sampling/03-SMARTS/SMARTS.py +++ b/materials/02-Using-gem5/09-sampling/03-SMARTS/SMARTS.py @@ -54,12 +54,12 @@ requires(isa_required=ISA.X86) cache_hierarchy = PrivateL1PrivateL2WalkCacheHierarchy( - l1d_size="32kB", - l1i_size="32kB", - l2_size="256kB", + l1d_size="32KiB", + l1i_size="32KiB", + l2_size="256KiB", ) -memory = DualChannelDDR4_2400(size="3GB") +memory = DualChannelDDR4_2400(size="3GiB") processor = SimpleSwitchableProcessor( starting_core_type=CPUTypes.ATOMIC, diff --git a/materials/02-Using-gem5/09-sampling/03-SMARTS/complete/SMARTS.py b/materials/02-Using-gem5/09-sampling/03-SMARTS/complete/SMARTS.py index 332f5f9..590824b 100644 --- a/materials/02-Using-gem5/09-sampling/03-SMARTS/complete/SMARTS.py +++ b/materials/02-Using-gem5/09-sampling/03-SMARTS/complete/SMARTS.py @@ -54,12 +54,12 @@ requires(isa_required=ISA.X86) cache_hierarchy = PrivateL1PrivateL2WalkCacheHierarchy( - l1d_size="32kB", - l1i_size="32kB", - l2_size="256kB", + l1d_size="32KiB", + l1i_size="32KiB", + l2_size="256KiB", ) -memory = DualChannelDDR4_2400(size="3GB") +memory = DualChannelDDR4_2400(size="3GiB") processor = SimpleSwitchableProcessor( starting_core_type=CPUTypes.ATOMIC, diff --git a/materials/02-Using-gem5/10-modeling-power/completed/three_level.py b/materials/02-Using-gem5/10-modeling-power/completed/three_level.py index 06cd060..1bf8e97 100644 --- a/materials/02-Using-gem5/10-modeling-power/completed/three_level.py +++ b/materials/02-Using-gem5/10-modeling-power/completed/three_level.py @@ -147,7 +147,7 @@ def _setup_io_cache(self, board: AbstractBoard) -> None: data_latency=50, response_latency=50, mshrs=20, - size="1kB", + size="1KiB", tgts_per_mshr=12, addr_ranges=board.mem_ranges, ) diff --git a/materials/02-Using-gem5/10-modeling-power/test-cache.py b/materials/02-Using-gem5/10-modeling-power/test-cache.py index 3671d94..8cc4b0e 100644 --- a/materials/02-Using-gem5/10-modeling-power/test-cache.py +++ b/materials/02-Using-gem5/10-modeling-power/test-cache.py @@ -48,7 +48,7 @@ def _instantiate(simulator): l3_size="2MiB", l3_assoc=32, ), - memory=DualChannelDDR4_2400(size="2GB"), + memory=DualChannelDDR4_2400(size="2GiB"), clk_freq="3GHz", ) diff --git a/materials/02-Using-gem5/10-modeling-power/three_level.py b/materials/02-Using-gem5/10-modeling-power/three_level.py index 5c87fe1..3c4b9ca 100644 --- a/materials/02-Using-gem5/10-modeling-power/three_level.py +++ b/materials/02-Using-gem5/10-modeling-power/three_level.py @@ -147,7 +147,7 @@ def _setup_io_cache(self, board: AbstractBoard) -> None: data_latency=50, response_latency=50, mshrs=20, - size="1kB", + size="1KiB", tgts_per_mshr=12, addr_ranges=board.mem_ranges, ) diff --git a/materials/02-Using-gem5/11-multisim/01-multiprocessing-via-script/experiment.py b/materials/02-Using-gem5/11-multisim/01-multiprocessing-via-script/experiment.py index 5605261..36a4348 100644 --- a/materials/02-Using-gem5/11-multisim/01-multiprocessing-via-script/experiment.py +++ b/materials/02-Using-gem5/11-multisim/01-multiprocessing-via-script/experiment.py @@ -34,7 +34,7 @@ l1i_size=args.instruction_cache ) -memory = SingleChannelDDR3_1600(size="32MB") +memory = SingleChannelDDR3_1600(size="32MiB") processor = SimpleProcessor(cpu_type=CPUTypes.TIMING, isa=ISA.X86, num_cores=1) diff --git a/materials/02-Using-gem5/11-multisim/01-multiprocessing-via-script/run-experiment.sh b/materials/02-Using-gem5/11-multisim/01-multiprocessing-via-script/run-experiment.sh index d789424..182500a 100755 --- a/materials/02-Using-gem5/11-multisim/01-multiprocessing-via-script/run-experiment.sh +++ b/materials/02-Using-gem5/11-multisim/01-multiprocessing-via-script/run-experiment.sh @@ -15,7 +15,7 @@ # ./run-experiment.sh # -gem5 -re -d experiment_1 experiment.py "8kB" "8kB" && \ -gem5 -re -d experiment_2 experiment.py "16kB" "8kB" && \ -gem5 -re -d experiment_3 experiment.py "8kB" "16kB" && \ -gem5 -re -d experiment_4 experiment.py "16kB" "16kB" +gem5 -re -d experiment_1 experiment.py "8KiB" "8KiB" && \ +gem5 -re -d experiment_2 experiment.py "16KiB" "8KiB" && \ +gem5 -re -d experiment_3 experiment.py "8KiB" "16KiB" && \ +gem5 -re -d experiment_4 experiment.py "16KiB" "16KiB" diff --git a/materials/02-Using-gem5/11-multisim/02-multiprocessing-via-multisim/multisim-experiment.py b/materials/02-Using-gem5/11-multisim/02-multiprocessing-via-multisim/multisim-experiment.py index 2f80347..1cb2ff2 100644 --- a/materials/02-Using-gem5/11-multisim/02-multiprocessing-via-multisim/multisim-experiment.py +++ b/materials/02-Using-gem5/11-multisim/02-multiprocessing-via-multisim/multisim-experiment.py @@ -33,7 +33,7 @@ l1i_size="TODO", ) -memory = SingleChannelDDR3_1600(size="32MB") +memory = SingleChannelDDR3_1600(size="32MiB") processor = SimpleProcessor(cpu_type=CPUTypes.TIMING, isa=ISA.X86, num_cores=1) diff --git a/materials/02-Using-gem5/11-multisim/completed/02-multiprocessing-via-multisim/multisim-experiment.py b/materials/02-Using-gem5/11-multisim/completed/02-multiprocessing-via-multisim/multisim-experiment.py index 1372fa6..feaae2a 100644 --- a/materials/02-Using-gem5/11-multisim/completed/02-multiprocessing-via-multisim/multisim-experiment.py +++ b/materials/02-Using-gem5/11-multisim/completed/02-multiprocessing-via-multisim/multisim-experiment.py @@ -22,15 +22,15 @@ multisim.set_num_processes(2) -for data_cache_size in ["8kB","16kB"]: - for instruction_cache_size in ["8kB","16kB"]: +for data_cache_size in ["8KiB","16KiB"]: + for instruction_cache_size in ["8KiB","16KiB"]: cache_hierarchy = PrivateL1CacheHierarchy( l1d_size=data_cache_size, l1i_size=instruction_cache_size, ) - memory = SingleChannelDDR3_1600(size="32MB") + memory = SingleChannelDDR3_1600(size="32MiB") processor = SimpleProcessor( cpu_type=CPUTypes.TIMING, diff --git a/materials/03-Developing-gem5-models/04-ports/step-1/configs/bootcamp/inspector-gadget/components/hybrid_generator.py b/materials/03-Developing-gem5-models/04-ports/step-1/configs/bootcamp/inspector-gadget/components/hybrid_generator.py index 11694a7..edf9963 100644 --- a/materials/03-Developing-gem5-models/04-ports/step-1/configs/bootcamp/inspector-gadget/components/hybrid_generator.py +++ b/materials/03-Developing-gem5-models/04-ports/step-1/configs/bootcamp/inspector-gadget/components/hybrid_generator.py @@ -46,7 +46,7 @@ def __init__( self, num_cores: int = 2, duration: str = "1ms", - rate: str = "1GB/s", + rate: str = "1GiB/s", block_size: int = 8, min_addr: int = 0, max_addr: int = 131072, diff --git a/materials/03-Developing-gem5-models/04-ports/step-1/configs/bootcamp/inspector-gadget/first-inspector-gadget-example.py b/materials/03-Developing-gem5-models/04-ports/step-1/configs/bootcamp/inspector-gadget/first-inspector-gadget-example.py index 4078be8..809182e 100644 --- a/materials/03-Developing-gem5-models/04-ports/step-1/configs/bootcamp/inspector-gadget/first-inspector-gadget-example.py +++ b/materials/03-Developing-gem5-models/04-ports/step-1/configs/bootcamp/inspector-gadget/first-inspector-gadget-example.py @@ -44,7 +44,7 @@ ## memory = ? generator = HybridGenerator( - num_cores=6, rate="1GB/s", duration="1ms", data_limit=128 + num_cores=6, rate="1GiB/s", duration="1ms", data_limit=128 ) motherboard = TestBoard( diff --git a/materials/03-Developing-gem5-models/04-ports/step-2/configs/bootcamp/inspector-gadget/components/hybrid_generator.py b/materials/03-Developing-gem5-models/04-ports/step-2/configs/bootcamp/inspector-gadget/components/hybrid_generator.py index 11694a7..edf9963 100644 --- a/materials/03-Developing-gem5-models/04-ports/step-2/configs/bootcamp/inspector-gadget/components/hybrid_generator.py +++ b/materials/03-Developing-gem5-models/04-ports/step-2/configs/bootcamp/inspector-gadget/components/hybrid_generator.py @@ -46,7 +46,7 @@ def __init__( self, num_cores: int = 2, duration: str = "1ms", - rate: str = "1GB/s", + rate: str = "1GiB/s", block_size: int = 8, min_addr: int = 0, max_addr: int = 131072, diff --git a/materials/03-Developing-gem5-models/04-ports/step-2/configs/bootcamp/inspector-gadget/first-inspector-gadget-example.py b/materials/03-Developing-gem5-models/04-ports/step-2/configs/bootcamp/inspector-gadget/first-inspector-gadget-example.py index 1b9f283..ef8032f 100644 --- a/materials/03-Developing-gem5-models/04-ports/step-2/configs/bootcamp/inspector-gadget/first-inspector-gadget-example.py +++ b/materials/03-Developing-gem5-models/04-ports/step-2/configs/bootcamp/inspector-gadget/first-inspector-gadget-example.py @@ -52,7 +52,7 @@ generator = HybridGenerator( num_cores=6, - rate="1GB/s", + rate="1GiB/s", duration="1ms", ) diff --git a/materials/03-Developing-gem5-models/04-ports/step-3/configs/bootcamp/inspector-gadget/components/hybrid_generator.py b/materials/03-Developing-gem5-models/04-ports/step-3/configs/bootcamp/inspector-gadget/components/hybrid_generator.py index 11694a7..edf9963 100644 --- a/materials/03-Developing-gem5-models/04-ports/step-3/configs/bootcamp/inspector-gadget/components/hybrid_generator.py +++ b/materials/03-Developing-gem5-models/04-ports/step-3/configs/bootcamp/inspector-gadget/components/hybrid_generator.py @@ -46,7 +46,7 @@ def __init__( self, num_cores: int = 2, duration: str = "1ms", - rate: str = "1GB/s", + rate: str = "1GiB/s", block_size: int = 8, min_addr: int = 0, max_addr: int = 131072, diff --git a/materials/03-Developing-gem5-models/04-ports/step-3/configs/bootcamp/inspector-gadget/first-inspector-gadget-example.py b/materials/03-Developing-gem5-models/04-ports/step-3/configs/bootcamp/inspector-gadget/first-inspector-gadget-example.py index 0aea7f6..7ac3508 100644 --- a/materials/03-Developing-gem5-models/04-ports/step-3/configs/bootcamp/inspector-gadget/first-inspector-gadget-example.py +++ b/materials/03-Developing-gem5-models/04-ports/step-3/configs/bootcamp/inspector-gadget/first-inspector-gadget-example.py @@ -53,7 +53,7 @@ generator = HybridGenerator( num_cores=4, - rate="1GB/s", + rate="1GiB/s", duration="1ms", ) diff --git a/materials/03-Developing-gem5-models/07-chi-protocol/completed/run-test.py b/materials/03-Developing-gem5-models/07-chi-protocol/completed/run-test.py index 8588f25..c90da68 100644 --- a/materials/03-Developing-gem5-models/07-chi-protocol/completed/run-test.py +++ b/materials/03-Developing-gem5-models/07-chi-protocol/completed/run-test.py @@ -21,7 +21,7 @@ l2_size="2MiB", l2_assoc=16, ), - memory=SingleChannelDDR4_2400(size="2GB"), + memory=SingleChannelDDR4_2400(size="2GiB"), clk_freq="3GHz", ) diff --git a/materials/03-Developing-gem5-models/08-ruby-network/run-test.py b/materials/03-Developing-gem5-models/08-ruby-network/run-test.py index 416ed37..6ef3322 100644 --- a/materials/03-Developing-gem5-models/08-ruby-network/run-test.py +++ b/materials/03-Developing-gem5-models/08-ruby-network/run-test.py @@ -21,7 +21,7 @@ l2_size="2MiB", l2_assoc=16, ), - memory=DualChannelDDR4_2400(size="2GB"), + memory=DualChannelDDR4_2400(size="2GiB"), clk_freq="3GHz", ) diff --git a/materials/03-Developing-gem5-models/09-extending-gem5-models/01-local-inst-tracker/simple-sim.py b/materials/03-Developing-gem5-models/09-extending-gem5-models/01-local-inst-tracker/simple-sim.py index b6ccc33..1ba8d72 100644 --- a/materials/03-Developing-gem5-models/09-extending-gem5-models/01-local-inst-tracker/simple-sim.py +++ b/materials/03-Developing-gem5-models/09-extending-gem5-models/01-local-inst-tracker/simple-sim.py @@ -53,11 +53,11 @@ cache_hierarchy = PrivateL1CacheHierarchy( - l1d_size="64kB", - l1i_size="64kB", + l1d_size="64KiB", + l1i_size="64KiB", ) -memory = SingleChannelDDR4_2400("1GB") +memory = SingleChannelDDR4_2400("1GiB") processor = SimpleProcessor( cpu_type = CPUTypes.ATOMIC, diff --git a/materials/03-Developing-gem5-models/09-extending-gem5-models/02-global-inst-tracker/simple-sim.py b/materials/03-Developing-gem5-models/09-extending-gem5-models/02-global-inst-tracker/simple-sim.py index 4a03e76..e61e915 100644 --- a/materials/03-Developing-gem5-models/09-extending-gem5-models/02-global-inst-tracker/simple-sim.py +++ b/materials/03-Developing-gem5-models/09-extending-gem5-models/02-global-inst-tracker/simple-sim.py @@ -55,11 +55,11 @@ cache_hierarchy = PrivateL1CacheHierarchy( - l1d_size="64kB", - l1i_size="64kB", + l1d_size="64KiB", + l1i_size="64KiB", ) -memory = SingleChannelDDR4_2400("1GB") +memory = SingleChannelDDR4_2400("1GiB") processor = SimpleProcessor( cpu_type = CPUTypes.TIMING, diff --git a/materials/05-Other-simulators/02-dram/dramsim-example.py b/materials/05-Other-simulators/02-dram/dramsim-example.py index d90d2c2..4e1aab6 100644 --- a/materials/05-Other-simulators/02-dram/dramsim-example.py +++ b/materials/05-Other-simulators/02-dram/dramsim-example.py @@ -13,7 +13,7 @@ generator = LinearGenerator( duration="250us", - rate="40GB/s", + rate="40GiB/s", num_cores=1, max_addr=memory.get_size(), ) diff --git a/materials/05-Other-simulators/02-dram/dramsys-example.py b/materials/05-Other-simulators/02-dram/dramsys-example.py index c09e729..adb0e4b 100644 --- a/materials/05-Other-simulators/02-dram/dramsys-example.py +++ b/materials/05-Other-simulators/02-dram/dramsys-example.py @@ -16,12 +16,12 @@ configuration="/workspaces/2024/gem5/ext/dramsys/DRAMSys/configs/ddr4-example.json", recordable=True, resource_directory="/workspaces/2024/gem5/ext/dramsys/DRAMSys/configs", - size="4GB", + size="4GiB", ) generator = LinearGenerator( duration="250us", - rate="40GB/s", + rate="40GiB/s", num_cores=1, max_addr=memory.get_size(), ) diff --git a/materials/06-Contributing/02-testing/01-testlib-example/completed/example_config.py b/materials/06-Contributing/02-testing/01-testlib-example/completed/example_config.py index 14d4ea2..d53bbc7 100644 --- a/materials/06-Contributing/02-testing/01-testlib-example/completed/example_config.py +++ b/materials/06-Contributing/02-testing/01-testlib-example/completed/example_config.py @@ -20,7 +20,7 @@ requires(isa_required=ISA.ARM) cache_hierarchy = NoCache() -memory = SingleChannelDDR3_1600(size="32MB") +memory = SingleChannelDDR3_1600(size="32MiB") processor = SimpleProcessor(cpu_type=CPUTypes.TIMING, isa=ISA.ARM, num_cores=1) parser = argparse.ArgumentParser() diff --git a/materials/06-Contributing/02-testing/01-testlib-example/example_config.py b/materials/06-Contributing/02-testing/01-testlib-example/example_config.py index 14d4ea2..d53bbc7 100644 --- a/materials/06-Contributing/02-testing/01-testlib-example/example_config.py +++ b/materials/06-Contributing/02-testing/01-testlib-example/example_config.py @@ -20,7 +20,7 @@ requires(isa_required=ISA.ARM) cache_hierarchy = NoCache() -memory = SingleChannelDDR3_1600(size="32MB") +memory = SingleChannelDDR3_1600(size="32MiB") processor = SimpleProcessor(cpu_type=CPUTypes.TIMING, isa=ISA.ARM, num_cores=1) parser = argparse.ArgumentParser()