From ce1f54dc7a167a6e389d9550fbcc7ea7b0f55c6d Mon Sep 17 00:00:00 2001 From: Kevin Date: Tue, 18 Nov 2025 12:45:23 +0000 Subject: [PATCH 1/2] Add overview schematic --- docs/images/overview.svg | 612 ++++++++++++++++++++++++++++++++ docs/user-guide/introduction.md | 85 +++-- mkdocs.yml | 2 + 3 files changed, 678 insertions(+), 21 deletions(-) create mode 100644 docs/images/overview.svg diff --git a/docs/images/overview.svg b/docs/images/overview.svg new file mode 100644 index 0000000..456ff9a --- /dev/null +++ b/docs/images/overview.svg @@ -0,0 +1,612 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + /home + + + + + + + + + + + + + + /work + + + + + + login.cirrus.ac.uk + + + + + + + + ln01 + ln02 + ln03 + + ln04 + + FRONT END or LOGIN NODES + + BACK END or COMPUTE NODES + + + standard partition + cn0000-0063 + cn0064-0127 + cn0128-0191 + + cn0192-0255 + highmem partition + + + + + + + SLURM + + + ssh + + USER + + + + + + + diff --git a/docs/user-guide/introduction.md b/docs/user-guide/introduction.md index 026c4c0..5fc24aa 100644 --- a/docs/user-guide/introduction.md +++ b/docs/user-guide/introduction.md @@ -1,34 +1,77 @@ # Introduction -This guide is designed to be a reference for users of the -high-performance computing (HPC) facility: Cirrus. It provides all the -information needed to access the system, transfer data, manage your -resources (disk and compute time), submit jobs, compile programs and -manage your environment. +The Cirrus EX4000 system was installed in Q4 2025. The underlying technology +is supplied by HPE Cray and is based on AMD 9005 series processors. The system +runs a version of Red Hat Enterprise Linux (RHEL). -## Acknowledging Cirrus -You should use the following phrase to acknowledge Cirrus in all -research outputs that have used the facility: +## Overview of the Cirrus system -*This work used the Cirrus UK National Tier-2 HPC Service at EPCC -(http://www.cirrus.ac.uk) funded by the University of Edinburgh and -EPSRC (EP/P020267/1)* +
+ ![Overview of the Cirrus system](/images/overview.svg){ width="90%" } +
A schematic of the Cirrus system, where users login into + the front end nodes, and work on the back end is managed by SLURM. +
+
+ + +There are four front end or login nodes which are intended for interactive +access, and lightweight pre-processing and post-processing work. The front +end nodes use AMD EPYC 9745 processors (two 128-core processors per node) +each with a total of 1.5 TB of memory. + +The SLURM workload manager provides access to a total of 256 back end or +compute nodes. All compute nodes have two 144-core AMD 9825 processors (a +total of 288 physical codes per node). There are 192 standard compute +nodes with 768 GB DDR5 RAM per node, and 64 "high memory" nodes with +1,536 GB per node. All the back end compute nodes are connected with +Slingshot 11 interconnect. + +The SLURM scheduler is also informally known as "the queue system", +although SLURM itself does not have the exact concept of queues. +Work is submitted to _partitions_ with a given _quality of service_ (QoS). + +For further details of the compute node hardware and network, see the +[hardware description](/user-guide/hardware). -You should also tag outputs with the keyword *Cirrus* whenever possible. +### Storage -## Hardware +Storage is provided by two file systems: -Details of the Cirrus hardware are available on the Cirrus website: +- A 1.0 PB HPE E1000 ClusterStor Lustre parallel file system mounted on both + the front end and compute nodes. It is also referred to as the work file + system (`/work`). The work file system _must_ be used for work submitted to + SLURM. -- [Cirrus Hardware](https://www.cirrus.ac.uk/about/hardware-software/) +- A 1.5 PB Ceph distributed file system mounted on the login nodes but + not the compute nodes. It is the location of users' home directories + (`/home`). + It follows that work submitted to SLURM must not reference users' + home directories. -## Useful terminology +For further storage details see +[Data Management and Transfer](/user-guide/data). -This is a list of terminology used throughout this guide and its -meaning. +## Charging -- coreh: Cirrus CPU time is measured in CPUh. Each job you run on the service - consumes CPUhs from your budget. You can find out more about coreh and - how to track your usage in the [resource management section](../resource_management/) +Cirrus is a CPU-only system and usage of the queue system (SLURM) is +accounted for in "CPUh" (aka "core hours"). For example, a job requesting +one node exclusively for one hour will be charged 288 CPUh (288 core hours) +from the relevant budget if it completes in exactly 60 minutes. Jobs +requesting less than a full node will be charged pro-rata according to +the number of CPUs (cores) requested and time taken. Accounting takes +place at job completion. +Applications for access to the service should make estimates of computational +resource requirement in these units. Requirements for disk usage should +be made in GB or TB. + + +## Acknowledging Cirrus + +Please use the following phrase to acknowledge Cirrus in all +research outputs that have used the facility: + +*This work used the Cirrus UK National Tier-2 HPC Service at EPCC +(http://www.cirrus.ac.uk) funded by the University of Edinburgh and +EPSRC.* diff --git a/mkdocs.yml b/mkdocs.yml index f63b992..582d927 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -29,6 +29,8 @@ extra: markdown_extensions: - admonition + - attr_list + - md_in_html - pymdownx.details - pymdownx.superfences - pymdownx.tabbed: From 7b5a36b3558302010071c3b561a29a7345a3f507 Mon Sep 17 00:00:00 2001 From: Kevin Date: Thu, 20 Nov 2025 09:04:43 +0000 Subject: [PATCH 2/2] Updates following review --- docs/images/overview.svg | 64 +++++++++++++++------------------ docs/user-guide/introduction.md | 10 +++--- 2 files changed, 34 insertions(+), 40 deletions(-) diff --git a/docs/images/overview.svg b/docs/images/overview.svg index 456ff9a..1c0792c 100644 --- a/docs/images/overview.svg +++ b/docs/images/overview.svg @@ -25,12 +25,12 @@ inkscape:document-units="mm" showgrid="true" inkscape:zoom="0.60620403" - inkscape:cx="546.84559" + inkscape:cx="527.05027" inkscape:cy="371.16216" inkscape:window-width="1179" inkscape:window-height="768" - inkscape:window-x="282" - inkscape:window-y="105" + inkscape:window-x="654" + inkscape:window-y="249" inkscape:window-maximized="0" inkscape:current-layer="layer1"> ln01 + id="tspan538" + x="35.52409" + y="55.808826">login01 ln02 + id="tspan542" + x="35.553375" + y="76.975487">login02 ln03 - - ln04 - + id="tspan540" + x="83.106026" + y="55.808826">login03 + login04 diff --git a/docs/user-guide/introduction.md b/docs/user-guide/introduction.md index 5fc24aa..f4c0ec3 100644 --- a/docs/user-guide/introduction.md +++ b/docs/user-guide/introduction.md @@ -55,11 +55,12 @@ For further storage details see ## Charging Cirrus is a CPU-only system and usage of the queue system (SLURM) is -accounted for in "CPUh" (aka "core hours"). For example, a job requesting -one node exclusively for one hour will be charged 288 CPUh (288 core hours) +accounted for in core hours (referred to as "coreh" in SAFE). +For example, a job requesting +one node exclusively for one hour will be charged 288 core hours from the relevant budget if it completes in exactly 60 minutes. Jobs requesting less than a full node will be charged pro-rata according to -the number of CPUs (cores) requested and time taken. Accounting takes +the number of cores requested and time taken. Accounting takes place at job completion. Applications for access to the service should make estimates of computational @@ -73,5 +74,6 @@ Please use the following phrase to acknowledge Cirrus in all research outputs that have used the facility: *This work used the Cirrus UK National Tier-2 HPC Service at EPCC -(http://www.cirrus.ac.uk) funded by the University of Edinburgh and +(http://www.cirrus.ac.uk) funded by The University of Edinburgh, +the Edinburgh and South East Scotland City Region Deal, and UKRI via EPSRC.*