diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 8aed6d94c4cd0..6f1f70696000a 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -327,6 +327,52 @@ Description: Energy performance preference This file is only present if the cppc-cpufreq driver is in use. +What: /sys/devices/system/cpu/cpuX/cpufreq/min_perf +Date: December 2025 +Contact: linux-pm@vger.kernel.org +Description: Minimum Performance Frequency + + Read/write a frequency value in kHz from/to this file. This + file conveys the minimum performance level (as frequency) at + which the platform may run. The frequency value is internally + converted to a performance value and must correspond to a + performance level in the range [Lowest Performance, Highest + Performance], inclusive. The minimum must be less than or equal + to the maximum performance. The performance range can be checked + from nodes: + /sys/devices/system/cpu/cpuX/acpi_cppc/highest_perf + /sys/devices/system/cpu/cpuX/acpi_cppc/lowest_perf + + This file is only present if the cppc-cpufreq driver is in use. + +What: /sys/devices/system/cpu/cpuX/cpufreq/max_perf +Date: December 2025 +Contact: linux-pm@vger.kernel.org +Description: Maximum Performance Frequency + + Read/write a frequency value in kHz from/to this file. This + file conveys the maximum performance level (as frequency) at + which the platform may run. The frequency value is internally + converted to a performance value and must correspond to a + performance level in the range [Lowest Performance, Highest + Performance], inclusive. The performance range can be checked + from nodes: + /sys/devices/system/cpu/cpuX/acpi_cppc/highest_perf + /sys/devices/system/cpu/cpuX/acpi_cppc/lowest_perf + + This file is only present if the cppc-cpufreq driver is in use. + +What: /sys/devices/system/cpu/cpuX/cpufreq/perf_limited +Date: December 2025 +Contact: linux-pm@vger.kernel.org +Description: Performance Limited + + Read/write a 32 bits value from/to this file. This file indicates + to OSPM that an unpredictable event has limited processor + performance, and the delivered performance may be less than + desired/minimum performance. + + This file is only present if the cppc-cpufreq driver is in use. What: /sys/devices/system/cpu/cpu*/cache/index3/cache_disable_{0,1} Date: August 2008 diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index aeea7e9288376..44a70e1ab59a2 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -911,6 +911,18 @@ Format: ,,,[,] + cppc_cpufreq.auto_sel_mode= + [CPU_FREQ] Enable ACPI CPPC autonomous performance selection. + When enabled, hardware automatically adjusts CPU frequency + on all CPUs based on workload demands. In Autonomous mode, + Energy Performance Preference(EPP) hints guide hardware + toward performance(0x0) or energy efficiency (0xff). + Requires ACPI CPPC autonomous selection register support. + Format: + Default: 0 (disabled) + 0: use cpufreq governors + 1: enable if supoorted by hardware + cpuidle.off=1 [CPU_IDLE] disable the cpuidle sub-system @@ -6161,7 +6173,7 @@ rdt= [HW,X86,RDT] Turn on/off individual RDT features. List is: cmt, mbmtotal, mbmlocal, l3cat, l3cdp, l2cat, l2cdp, - mba, smba, bmec. + mba, smba, bmec, abmc. E.g. to turn on cmt and turn off mba use: rdt=cmt,!mba diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index a7ec57060f64f..3667650036fba 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -213,6 +213,9 @@ stable kernels. | ARM | GIC-700 | #2941627 | ARM64_ERRATUM_2941627 | +----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ +| ARM | CMN-650 | #3642720 | N/A | ++----------------+-----------------+-----------------+-----------------------------+ ++----------------+-----------------+-----------------+-----------------------------+ | Broadcom | Brahma-B53 | N/A | ARM64_ERRATUM_845719 | +----------------+-----------------+-----------------+-----------------------------+ | Broadcom | Brahma-B53 | N/A | ARM64_ERRATUM_843419 | @@ -246,6 +249,12 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | NVIDIA | T241 GICv3/4.x | T241-FABRIC-4 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +| NVIDIA | T241 MPAM | T241-MPAM-1 | N/A | ++----------------+-----------------+-----------------+-----------------------------+ +| NVIDIA | T241 MPAM | T241-MPAM-4 | N/A | ++----------------+-----------------+-----------------+-----------------------------+ +| NVIDIA | T241 MPAM | T241-MPAM-6 | N/A | ++----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ | Freescale/NXP | LS2080A/LS1043A | A-008585 | FSL_ERRATUM_A008585 | +----------------+-----------------+-----------------+-----------------------------+ diff --git a/Documentation/devicetree/bindings/arm/arm,mpam-msc.yaml b/Documentation/devicetree/bindings/arm/arm,mpam-msc.yaml new file mode 100644 index 0000000000000..53a6fdbbf05fe --- /dev/null +++ b/Documentation/devicetree/bindings/arm/arm,mpam-msc.yaml @@ -0,0 +1,199 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/arm/arm,mpam-msc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Arm Memory System Resource Partitioning and Monitoring (MPAM) + +description: | + The Arm MPAM specification can be found here: + + https://developer.arm.com/documentation/ddi0598/latest + +maintainers: + - Rob Herring + +properties: + compatible: + items: + - const: arm,mpam-msc # Further details are discoverable + - const: arm,mpam-memory-controller-msc + + reg: + maxItems: 1 + description: A memory region containing registers as defined in the MPAM + specification. + + interrupts: + minItems: 1 + items: + - description: error (optional) + - description: overflow (optional, only for monitoring) + + interrupt-names: + oneOf: + - items: + - enum: [ error, overflow ] + - items: + - const: error + - const: overflow + + arm,not-ready-us: + description: The maximum time in microseconds for monitoring data to be + accurate after a settings change. For more information, see the + Not-Ready (NRDY) bit description in the MPAM specification. + + numa-node-id: true # see NUMA binding + + '#address-cells': + const: 1 + + '#size-cells': + const: 0 + +patternProperties: + '^ris@[0-9a-f]+$': + type: object + additionalProperties: false + description: + RIS nodes for each resource instance in an MSC. These nodes are required + for each resource instance implementing known MPAM controls + + properties: + compatible: + enum: + - arm,mpam-cache + # Memory bandwidth + - arm,mpam-memory + + reg: + minimum: 0 + maximum: 0xf + + cpus: + description: + Phandle(s) to the CPU node(s) this RIS belongs to. By default, the parent + device's affinity is used. + + arm,mpam-device: + $ref: /schemas/types.yaml#/definitions/phandle + description: + By default, the MPAM enabled device associated with a RIS is the MSC's + parent node. It is possible for each RIS to be associated with different + devices in which case 'arm,mpam-device' should be used. + + required: + - compatible + - reg + +required: + - compatible + - reg + +dependencies: + interrupts: [ interrupt-names ] + +additionalProperties: false + +examples: + - | + L3: cache-controller@30000000 { + compatible = "arm,dsu-l3-cache", "cache"; + cache-level = <3>; + cache-unified; + + ranges = <0x0 0x30000000 0x800000>; + #address-cells = <1>; + #size-cells = <1>; + + msc@10000 { + compatible = "arm,mpam-msc"; + + reg = <0x10000 0x2000>; + interrupts = <1>, <2>; + interrupt-names = "error", "overflow"; + arm,not-ready-us = <1>; + /* CPU affinity implied by parent cache node */ + }; + }; + + mem: memory-controller@20000 { + compatible = "foo,a-memory-controller"; + reg = <0x20000 0x1000>; + + #address-cells = <1>; + #size-cells = <1>; + ranges; + + msc@21000 { + compatible = "arm,mpam-memory-controller-msc", "arm,mpam-msc"; + reg = <0x21000 0x1000>; + interrupts = <3>; + interrupt-names = "error"; + arm,not-ready-us = <1>; + numa-node-id = <1>; + }; + }; + + iommu@40000 { + reg = <0x40000 0x1000>; + + ranges; + #address-cells = <1>; + #size-cells = <1>; + + msc@41000 { + compatible = "arm,mpam-msc"; + reg = <0 0x1000>; + interrupts = <5>, <6>; + interrupt-names = "error", "overflow"; + arm,not-ready-us = <1>; + + #address-cells = <1>; + #size-cells = <0>; + + ris@2 { + compatible = "arm,mpam-cache"; + reg = <0>; + // TODO: How to map to device(s)? + }; + }; + }; + + msc@80000 { + compatible = "foo,a-standalone-msc"; + reg = <0x80000 0x1000>; + + clocks = <&clks 123>; + + ranges; + #address-cells = <1>; + #size-cells = <1>; + + msc@10000 { + compatible = "arm,mpam-msc"; + + reg = <0x10000 0x2000>; + interrupts = <7>; + interrupt-names = "overflow"; + arm,not-ready-us = <1>; + + #address-cells = <1>; + #size-cells = <0>; + + ris@0 { + compatible = "arm,mpam-cache"; + reg = <0>; + arm,mpam-device = <&L2_0>; + }; + + ris@1 { + compatible = "arm,mpam-memory"; + reg = <1>; + arm,mpam-device = <&mem>; + }; + }; + }; + +... diff --git a/Documentation/devicetree/bindings/i3c/i3c.yaml b/Documentation/devicetree/bindings/i3c/i3c.yaml index e25fa72fd7857..7fa0cf490f209 100644 --- a/Documentation/devicetree/bindings/i3c/i3c.yaml +++ b/Documentation/devicetree/bindings/i3c/i3c.yaml @@ -24,15 +24,14 @@ properties: description: | Each I2C device connected to the bus should be described in a subnode. - All I3C devices are supposed to support DAA (Dynamic Address Assignment), - and are thus discoverable. So, by default, I3C devices do not have to be - described in the device tree. This being said, one might want to attach - extra resources to these devices, and those resources may have to be - described in the device tree, which in turn means we have to describe - I3C devices. - - Another use case for describing an I3C device in the device tree is when - this I3C device has a static I2C address and we want to assign it a + By default, I3C devices do not have to be described in the device tree. + This being said, one might want to attach extra resources to these + devices, and those resources may have to be described in the device tree, + which in turn means we have to describe I3C devices. + + I3C child would have to be described in the device tree if the I3C device + uses SETAASA for its discovery and needs to be assigned a static + address, or if it uses a static I2C address and we want to assign a specific I3C dynamic address before the DAA takes place (so that other devices on the bus can't take this dynamic address). @@ -147,6 +146,17 @@ patternProperties: through SETDASA. If static address is not present, this address is assigned through SETNEWDA after assigning a temporary address via ENTDAA. + mipi-i3c-static-method: + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [ 1, 2 ] + description: | + Bitmap (Bit(0) = ENTDAA, Bit(1) = SETAASA) that indicates the static + address method used for the device discovery. This property is mandatory + for I3C devices that require to use SETAASA instead of ENTDAA to assign + a static address. The static address will be the one encoded in reg[0] + if SETAASA is used. ENTDAA will remain as the default method even if + this property is not present. + required: - reg diff --git a/Documentation/filesystems/resctrl.rst b/Documentation/filesystems/resctrl.rst index c7949dd44f2f3..b9f6aa44fc4d7 100644 --- a/Documentation/filesystems/resctrl.rst +++ b/Documentation/filesystems/resctrl.rst @@ -26,6 +26,7 @@ MBM (Memory Bandwidth Monitoring) "cqm_mbm_total", "cqm_mbm_local" MBA (Memory Bandwidth Allocation) "mba" SMBA (Slow Memory Bandwidth Allocation) "" BMEC (Bandwidth Monitoring Event Configuration) "" +ABMC (Assignable Bandwidth Monitoring Counters) "" =============================================== ================================ Historically, new features were made visible by default in /proc/cpuinfo. This @@ -143,12 +144,11 @@ with respect to allocation: user can request. "bandwidth_gran": - The granularity in which the memory bandwidth + The approximate granularity in which the memory bandwidth percentage is allocated. The allocated b/w percentage is rounded off to the next - control step available on the hardware. The - available bandwidth control steps are: - min_bandwidth + N * bandwidth_gran. + control step available on the hardware. The available + steps are at least as small as this value. "delay_linear": Indicates if the delay scale is linear or @@ -256,6 +256,144 @@ with the following files: # cat /sys/fs/resctrl/info/L3_MON/mbm_local_bytes_config 0=0x30;1=0x30;3=0x15;4=0x15 +"mbm_assign_mode": + The supported counter assignment modes. The enclosed brackets indicate which mode + is enabled. The MBM events associated with counters may reset when "mbm_assign_mode" + is changed. + :: + + # cat /sys/fs/resctrl/info/L3_MON/mbm_assign_mode + [mbm_event] + default + + "mbm_event": + + mbm_event mode allows users to assign a hardware counter to an RMID, event + pair and monitor the bandwidth usage as long as it is assigned. The hardware + continues to track the assigned counter until it is explicitly unassigned by + the user. Each event within a resctrl group can be assigned independently. + + In this mode, a monitoring event can only accumulate data while it is backed + by a hardware counter. Use "mbm_L3_assignments" found in each CTRL_MON and MON + group to specify which of the events should have a counter assigned. The number + of counters available is described in the "num_mbm_cntrs" file. Changing the + mode may cause all counters on the resource to reset. + + Moving to mbm_event counter assignment mode requires users to assign the counters + to the events. Otherwise, the MBM event counters will return 'Unassigned' when read. + + The mode is beneficial for AMD platforms that support more CTRL_MON + and MON groups than available hardware counters. By default, this + feature is enabled on AMD platforms with the ABMC (Assignable Bandwidth + Monitoring Counters) capability, ensuring counters remain assigned even + when the corresponding RMID is not actively used by any processor. + + "default": + + In default mode, resctrl assumes there is a hardware counter for each + event within every CTRL_MON and MON group. On AMD platforms, it is + recommended to use the mbm_event mode, if supported, to prevent reset of MBM + events between reads resulting from hardware re-allocating counters. This can + result in misleading values or display "Unavailable" if no counter is assigned + to the event. + + * To enable "mbm_event" counter assignment mode: + :: + + # echo "mbm_event" > /sys/fs/resctrl/info/L3_MON/mbm_assign_mode + + * To enable "default" monitoring mode: + :: + + # echo "default" > /sys/fs/resctrl/info/L3_MON/mbm_assign_mode + +"num_mbm_cntrs": + The maximum number of counters (total of available and assigned counters) in + each domain when the system supports mbm_event mode. + + For example, on a system with maximum of 32 memory bandwidth monitoring + counters in each of its L3 domains: + :: + + # cat /sys/fs/resctrl/info/L3_MON/num_mbm_cntrs + 0=32;1=32 + +"available_mbm_cntrs": + The number of counters available for assignment in each domain when mbm_event + mode is enabled on the system. + + For example, on a system with 30 available [hardware] assignable counters + in each of its L3 domains: + :: + + # cat /sys/fs/resctrl/info/L3_MON/available_mbm_cntrs + 0=30;1=30 + +"event_configs": + Directory that exists when "mbm_event" counter assignment mode is supported. + Contains a sub-directory for each MBM event that can be assigned to a counter. + + Two MBM events are supported by default: mbm_local_bytes and mbm_total_bytes. + Each MBM event's sub-directory contains a file named "event_filter" that is + used to view and modify which memory transactions the MBM event is configured + with. The file is accessible only when "mbm_event" counter assignment mode is + enabled. + + List of memory transaction types supported: + + ========================== ======================================================== + Name Description + ========================== ======================================================== + dirty_victim_writes_all Dirty Victims from the QOS domain to all types of memory + remote_reads_slow_memory Reads to slow memory in the non-local NUMA domain + local_reads_slow_memory Reads to slow memory in the local NUMA domain + remote_non_temporal_writes Non-temporal writes to non-local NUMA domain + local_non_temporal_writes Non-temporal writes to local NUMA domain + remote_reads Reads to memory in the non-local NUMA domain + local_reads Reads to memory in the local NUMA domain + ========================== ======================================================== + + For example:: + + # cat /sys/fs/resctrl/info/L3_MON/event_configs/mbm_total_bytes/event_filter + local_reads,remote_reads,local_non_temporal_writes,remote_non_temporal_writes, + local_reads_slow_memory,remote_reads_slow_memory,dirty_victim_writes_all + + # cat /sys/fs/resctrl/info/L3_MON/event_configs/mbm_local_bytes/event_filter + local_reads,local_non_temporal_writes,local_reads_slow_memory + + Modify the event configuration by writing to the "event_filter" file within + the "event_configs" directory. The read/write "event_filter" file contains the + configuration of the event that reflects which memory transactions are counted by it. + + For example:: + + # echo "local_reads, local_non_temporal_writes" > + /sys/fs/resctrl/info/L3_MON/event_configs/mbm_total_bytes/event_filter + + # cat /sys/fs/resctrl/info/L3_MON/event_configs/mbm_total_bytes/event_filter + local_reads,local_non_temporal_writes + +"mbm_assign_on_mkdir": + Exists when "mbm_event" counter assignment mode is supported. Accessible + only when "mbm_event" counter assignment mode is enabled. + + Determines if a counter will automatically be assigned to an RMID, MBM event + pair when its associated monitor group is created via mkdir. Enabled by default + on boot, also when switched from "default" mode to "mbm_event" counter assignment + mode. Users can disable this capability by writing to the interface. + + "0": + Auto assignment is disabled. + "1": + Auto assignment is enabled. + + Example:: + + # echo 0 > /sys/fs/resctrl/info/L3_MON/mbm_assign_on_mkdir + # cat /sys/fs/resctrl/info/L3_MON/mbm_assign_on_mkdir + 0 + "max_threshold_occupancy": Read/write file provides the largest value (in bytes) at which a previously used LLC_occupancy @@ -380,10 +518,77 @@ When monitoring is enabled all MON groups will also contain: for the L3 cache they occupy). These are named "mon_sub_L3_YY" where "YY" is the node number. + When the 'mbm_event' counter assignment mode is enabled, reading + an MBM event of a MON group returns 'Unassigned' if no hardware + counter is assigned to it. For CTRL_MON groups, 'Unassigned' is + returned if the MBM event does not have an assigned counter in the + CTRL_MON group nor in any of its associated MON groups. + "mon_hw_id": Available only with debug option. The identifier used by hardware for the monitor group. On x86 this is the RMID. +When monitoring is enabled all MON groups may also contain: + +"mbm_L3_assignments": + Exists when "mbm_event" counter assignment mode is supported and lists the + counter assignment states of the group. + + The assignment list is displayed in the following format: + + :=;= + + Event: A valid MBM event in the + /sys/fs/resctrl/info/L3_MON/event_configs directory. + + Domain ID: A valid domain ID. When writing, '*' applies the changes + to all the domains. + + Assignment states: + + _ : No counter assigned. + + e : Counter assigned exclusively. + + Example: + + To display the counter assignment states for the default group. + :: + + # cd /sys/fs/resctrl + # cat /sys/fs/resctrl/mbm_L3_assignments + mbm_total_bytes:0=e;1=e + mbm_local_bytes:0=e;1=e + + Assignments can be modified by writing to the interface. + + Examples: + + To unassign the counter associated with the mbm_total_bytes event on domain 0: + :: + + # echo "mbm_total_bytes:0=_" > /sys/fs/resctrl/mbm_L3_assignments + # cat /sys/fs/resctrl/mbm_L3_assignments + mbm_total_bytes:0=_;1=e + mbm_local_bytes:0=e;1=e + + To unassign the counter associated with the mbm_total_bytes event on all the domains: + :: + + # echo "mbm_total_bytes:*=_" > /sys/fs/resctrl/mbm_L3_assignments + # cat /sys/fs/resctrl/mbm_L3_assignments + mbm_total_bytes:0=_;1=_ + mbm_local_bytes:0=e;1=e + + To assign a counter associated with the mbm_total_bytes event on all domains in + exclusive mode: + :: + + # echo "mbm_total_bytes:*=e" > /sys/fs/resctrl/mbm_L3_assignments + # cat /sys/fs/resctrl/mbm_L3_assignments + mbm_total_bytes:0=e;1=e + mbm_local_bytes:0=e;1=e + When the "mba_MBps" mount option is used all CTRL_MON groups will also contain: "mba_MBps_event": @@ -1429,6 +1634,125 @@ View the llc occupancy snapshot:: # cat /sys/fs/resctrl/p1/mon_data/mon_L3_00/llc_occupancy 11234000 + +Examples on working with mbm_assign_mode +======================================== + +a. Check if MBM counter assignment mode is supported. +:: + + # mount -t resctrl resctrl /sys/fs/resctrl/ + + # cat /sys/fs/resctrl/info/L3_MON/mbm_assign_mode + [mbm_event] + default + +The "mbm_event" mode is detected and enabled. + +b. Check how many assignable counters are supported. +:: + + # cat /sys/fs/resctrl/info/L3_MON/num_mbm_cntrs + 0=32;1=32 + +c. Check how many assignable counters are available for assignment in each domain. +:: + + # cat /sys/fs/resctrl/info/L3_MON/available_mbm_cntrs + 0=30;1=30 + +d. To list the default group's assign states. +:: + + # cat /sys/fs/resctrl/mbm_L3_assignments + mbm_total_bytes:0=e;1=e + mbm_local_bytes:0=e;1=e + +e. To unassign the counter associated with the mbm_total_bytes event on domain 0. +:: + + # echo "mbm_total_bytes:0=_" > /sys/fs/resctrl/mbm_L3_assignments + # cat /sys/fs/resctrl/mbm_L3_assignments + mbm_total_bytes:0=_;1=e + mbm_local_bytes:0=e;1=e + +f. To unassign the counter associated with the mbm_total_bytes event on all domains. +:: + + # echo "mbm_total_bytes:*=_" > /sys/fs/resctrl/mbm_L3_assignments + # cat /sys/fs/resctrl/mbm_L3_assignment + mbm_total_bytes:0=_;1=_ + mbm_local_bytes:0=e;1=e + +g. To assign a counter associated with the mbm_total_bytes event on all domains in +exclusive mode. +:: + + # echo "mbm_total_bytes:*=e" > /sys/fs/resctrl/mbm_L3_assignments + # cat /sys/fs/resctrl/mbm_L3_assignments + mbm_total_bytes:0=e;1=e + mbm_local_bytes:0=e;1=e + +h. Read the events mbm_total_bytes and mbm_local_bytes of the default group. There is +no change in reading the events with the assignment. +:: + + # cat /sys/fs/resctrl/mon_data/mon_L3_00/mbm_total_bytes + 779247936 + # cat /sys/fs/resctrl/mon_data/mon_L3_01/mbm_total_bytes + 562324232 + # cat /sys/fs/resctrl/mon_data/mon_L3_00/mbm_local_bytes + 212122123 + # cat /sys/fs/resctrl/mon_data/mon_L3_01/mbm_local_bytes + 121212144 + +i. Check the event configurations. +:: + + # cat /sys/fs/resctrl/info/L3_MON/event_configs/mbm_total_bytes/event_filter + local_reads,remote_reads,local_non_temporal_writes,remote_non_temporal_writes, + local_reads_slow_memory,remote_reads_slow_memory,dirty_victim_writes_all + + # cat /sys/fs/resctrl/info/L3_MON/event_configs/mbm_local_bytes/event_filter + local_reads,local_non_temporal_writes,local_reads_slow_memory + +j. Change the event configuration for mbm_local_bytes. +:: + + # echo "local_reads, local_non_temporal_writes, local_reads_slow_memory, remote_reads" > + /sys/fs/resctrl/info/L3_MON/event_configs/mbm_local_bytes/event_filter + + # cat /sys/fs/resctrl/info/L3_MON/event_configs/mbm_local_bytes/event_filter + local_reads,local_non_temporal_writes,local_reads_slow_memory,remote_reads + +k. Now read the local events again. The first read may come back with "Unavailable" +status. The subsequent read of mbm_local_bytes will display the current value. +:: + + # cat /sys/fs/resctrl/mon_data/mon_L3_00/mbm_local_bytes + Unavailable + # cat /sys/fs/resctrl/mon_data/mon_L3_00/mbm_local_bytes + 2252323 + # cat /sys/fs/resctrl/mon_data/mon_L3_01/mbm_local_bytes + Unavailable + # cat /sys/fs/resctrl/mon_data/mon_L3_01/mbm_local_bytes + 1566565 + +l. Users have the option to go back to 'default' mbm_assign_mode if required. This can be +done using the following command. Note that switching the mbm_assign_mode may reset all +the MBM counters (and thus all MBM events) of all the resctrl groups. +:: + + # echo "default" > /sys/fs/resctrl/info/L3_MON/mbm_assign_mode + # cat /sys/fs/resctrl/info/L3_MON/mbm_assign_mode + mbm_event + [default] + +m. Unmount the resctrl filesystem. +:: + + # umount /sys/fs/resctrl/ + Intel RDT Errata ================ diff --git a/MAINTAINERS b/MAINTAINERS index 263288f73850d..85bd4a19a6c42 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21216,6 +21216,7 @@ M: Tony Luck M: Reinette Chatre R: Dave Martin R: James Morse +R: Babu Moger L: linux-kernel@vger.kernel.org S: Supported F: Documentation/filesystems/resctrl.rst diff --git a/Ubuntu.md b/Ubuntu.md index e00284995816a..cc91244731ecb 100644 --- a/Ubuntu.md +++ b/Ubuntu.md @@ -2,7 +2,7 @@ Name: linux-nvidia-6.17 Version: 6.17.0 Series: 24.04 (noble) Description: - This is the source code for the Ubuntu linux-nvidia-6.17 kernel for the -Noble series. This source tree is used to produce the flavours: nvidia, -nvidia-64k. This kernel is configured to support the NVIDIA x86 and arm64 -platforms. + This is the source code for the Ubuntu linux kernel for the 24.04 series. This + source tree is used to produce the flavours: nvidia, nvidia-64k. + This kernel is configured to support the widest range of desktop, laptop and + server configurations. diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c61d85dfa07c4..da0aaf0d5a635 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2061,6 +2061,33 @@ config ARM64_TLB_RANGE ARMv8.4-TLBI provides TLBI invalidation instruction that apply to a range of input addresses. +config ARM64_MPAM + bool "Enable support for MPAM" + select ARM64_MPAM_DRIVER + select ARCH_HAS_CPU_RESCTRL + help + Memory System Resource Partitioning and Monitoring (MPAM) is an + optional extension to the Arm architecture that allows each + transaction issued to the memory system to be labelled with a + Partition identifier (PARTID) and Performance Monitoring Group + identifier (PMG). + + Memory system components, such as the caches, can be configured with + policies to control how much of various physical resources (such as + memory bandwidth or cache memory) the transactions labelled with each + PARTID can consume. Depending on the capabilities of the hardware, + the PARTID and PMG can also be used as filtering criteria to measure + the memory system resource consumption of different parts of a + workload. + + Use of this extension requires CPU support, support in the + Memory System Components (MSC), and a description from firmware + of where the MSCs are in the address space. + + MPAM is exposed to user-space via the resctrl pseudo filesystem. + + This option enables the extra context switch code. + endmenu # "ARMv8.4 architectural features" menu "ARMv8.5 architectural features" diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 4d8c720c881f4..279db0f8965c0 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -555,6 +555,8 @@ CONFIG_I2C_TEGRA=y CONFIG_I2C_UNIPHIER_F=y CONFIG_I2C_RCAR=y CONFIG_I2C_CROS_EC_TUNNEL=y +CONFIG_I3C=m +CONFIG_DW_I3C_MASTER=m CONFIG_SPI=y CONFIG_SPI_ARMADA_3700=y CONFIG_SPI_BCM2835=m @@ -713,6 +715,7 @@ CONFIG_SENSORS_RASPBERRYPI_HWMON=m CONFIG_SENSORS_SL28CPLD=m CONFIG_SENSORS_INA2XX=m CONFIG_SENSORS_INA3221=m +CONFIG_SENSORS_SPD5118=m CONFIG_SENSORS_TMP102=m CONFIG_MISC_RP1=m CONFIG_THERMAL_GOV_POWER_ALLOCATOR=y diff --git a/arch/arm64/include/asm/mpam.h b/arch/arm64/include/asm/mpam.h new file mode 100644 index 0000000000000..05bc63fbdd4bf --- /dev/null +++ b/arch/arm64/include/asm/mpam.h @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2025 Arm Ltd. */ + +#ifndef __ASM__MPAM_H +#define __ASM__MPAM_H + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +DECLARE_STATIC_KEY_FALSE(mpam_enabled); +DECLARE_PER_CPU(u64, arm64_mpam_default); +DECLARE_PER_CPU(u64, arm64_mpam_current); + +/* + * The value of the MPAM0_EL1 sysreg when a task is in the default group. + * This is used by the context switch code to use the resctrl CPU property + * instead. The value is modified when CDP is enabled/disabled by mounting + * the resctrl filesystem. + */ +extern u64 arm64_mpam_global_default; + +/* + * The resctrl filesystem writes to the partid/pmg values for threads and CPUs, + * which may race with reads in __mpam_sched_in(). Ensure only one of the old + * or new values are used. Particular care should be taken with the pmg field + * as __mpam_sched_in() may read a partid and pmg that don't match, causing + * this value to be stored with cache allocations, despite being considered + * 'free' by resctrl. + * + * A value in struct thread_info is used instead of struct task_struct as the + * cpu's u64 register format is used, but struct task_struct has two u32'. + */ +static inline void mpam_set_cpu_defaults(int cpu, u16 partid_d, u16 partid_i, + u8 pmg_d, u8 pmg_i) +{ + u64 default_val; + + default_val = FIELD_PREP(MPAM0_EL1_PARTID_D, partid_d); + default_val |= FIELD_PREP(MPAM0_EL1_PARTID_I, partid_i); + default_val |= FIELD_PREP(MPAM0_EL1_PMG_D, pmg_d); + default_val |= FIELD_PREP(MPAM0_EL1_PMG_I, pmg_i); + + WRITE_ONCE(per_cpu(arm64_mpam_default, cpu), default_val); +} + +static inline void mpam_set_task_partid_pmg(struct task_struct *tsk, + u16 partid_d, u16 partid_i, + u8 pmg_d, u8 pmg_i) +{ +#ifdef CONFIG_ARM64_MPAM + u64 regval; + + regval = FIELD_PREP(MPAM0_EL1_PARTID_D, partid_d); + regval |= FIELD_PREP(MPAM0_EL1_PARTID_I, partid_i); + regval |= FIELD_PREP(MPAM0_EL1_PMG_D, pmg_d); + regval |= FIELD_PREP(MPAM0_EL1_PMG_I, pmg_i); + + WRITE_ONCE(task_thread_info(tsk)->mpam_partid_pmg, regval); +#endif +} + +static inline u64 mpam_get_regval(struct task_struct *tsk) +{ +#ifdef CONFIG_ARM64_MPAM + return READ_ONCE(task_thread_info(tsk)->mpam_partid_pmg); +#else + return 0; +#endif +} + +static inline void mpam_thread_switch(struct task_struct *tsk) +{ + u64 oldregval; + int cpu = smp_processor_id(); + u64 regval = mpam_get_regval(tsk); + + if (!IS_ENABLED(CONFIG_ARM64_MPAM) || + !static_branch_likely(&mpam_enabled)) + return; + + if (regval == READ_ONCE(arm64_mpam_global_default)) + regval = READ_ONCE(per_cpu(arm64_mpam_default, cpu)); + + oldregval = READ_ONCE(per_cpu(arm64_mpam_current, cpu)); + if (oldregval == regval) + return; + + /* Synchronising this write is left until the ERET to EL0 */ + write_sysreg_s(regval, SYS_MPAM0_EL1); + WRITE_ONCE(per_cpu(arm64_mpam_current, cpu), regval); +} +#endif /* __ASM__MPAM_H */ diff --git a/arch/arm64/include/asm/resctrl.h b/arch/arm64/include/asm/resctrl.h new file mode 100644 index 0000000000000..b506e95cf6e37 --- /dev/null +++ b/arch/arm64/include/asm/resctrl.h @@ -0,0 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index f241b8601ebd9..c226dabd50191 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -41,6 +41,9 @@ struct thread_info { #ifdef CONFIG_SHADOW_CALL_STACK void *scs_base; void *scs_sp; +#endif +#ifdef CONFIG_ARM64_MPAM + u64 mpam_partid_pmg; #endif u32 cpu; }; diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 76f32e424065e..15979f3665196 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -67,6 +67,7 @@ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o +obj-$(CONFIG_ARM64_MPAM) += mpam.o obj-$(CONFIG_ARM64_MTE) += mte.o obj-y += vdso-wrap.o obj-$(CONFIG_COMPAT_VDSO) += vdso32-wrap.o diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index c3ef9b161b812..ae1d1a8cf6ddc 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -86,6 +86,7 @@ #include #include #include +#include #include #include #include @@ -2527,6 +2528,12 @@ test_has_mpam(const struct arm64_cpu_capabilities *entry, int scope) static void cpu_enable_mpam(const struct arm64_cpu_capabilities *entry) { + int cpu = smp_processor_id(); + u64 regval = 0; + + if (IS_ENABLED(CONFIG_MPAM)) + regval = READ_ONCE(per_cpu(arm64_mpam_current, cpu)); + /* * Access by the kernel (at EL1) should use the reserved PARTID * which is configured unrestricted. This avoids priority-inversion @@ -2534,6 +2541,8 @@ cpu_enable_mpam(const struct arm64_cpu_capabilities *entry) * been throttled to release the lock. */ write_sysreg_s(0, SYS_MPAM1_EL1); + + write_sysreg_s(regval, SYS_MPAM0_EL1); } static bool diff --git a/arch/arm64/kernel/mpam.c b/arch/arm64/kernel/mpam.c new file mode 100644 index 0000000000000..e5cc0d2e0106b --- /dev/null +++ b/arch/arm64/kernel/mpam.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2025 Arm Ltd. */ + +#include + +#include +#include +#include +#include + +DEFINE_STATIC_KEY_FALSE(mpam_enabled); +DEFINE_PER_CPU(u64, arm64_mpam_default); +DEFINE_PER_CPU(u64, arm64_mpam_current); + +u64 arm64_mpam_global_default; + +static int mpam_pm_notifier(struct notifier_block *self, + unsigned long cmd, void *v) +{ + u64 regval; + int cpu = smp_processor_id(); + + switch (cmd) { + case CPU_PM_EXIT: + /* + * Don't use mpam_thread_switch() as the system register + * value has changed under our feet. + */ + regval = READ_ONCE(per_cpu(arm64_mpam_current, cpu)); + write_sysreg_s(0, SYS_MPAM1_EL1); + write_sysreg_s(regval, SYS_MPAM0_EL1); + + return NOTIFY_OK; + default: + return NOTIFY_DONE; + } +} + +static struct notifier_block mpam_pm_nb = { + .notifier_call = mpam_pm_notifier, +}; + +static int __init arm64_mpam_register_cpus(void) +{ + u64 mpamidr = read_sanitised_ftr_reg(SYS_MPAMIDR_EL1); + u16 partid_max = FIELD_GET(MPAMIDR_EL1_PARTID_MAX, mpamidr); + u8 pmg_max = FIELD_GET(MPAMIDR_EL1_PMG_MAX, mpamidr); + + cpu_pm_register_notifier(&mpam_pm_nb); + return mpam_register_requestor(partid_max, pmg_max); +} +/* Must occur before mpam_msc_driver_init() from subsys_initcall() */ +arch_initcall(arm64_mpam_register_cpus) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index fba7ca102a8c4..b510c0699313b 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -737,6 +738,12 @@ struct task_struct *__switch_to(struct task_struct *prev, if (prev->thread.sctlr_user != next->thread.sctlr_user) update_sctlr_el1(next->thread.sctlr_user); + /* + * MPAM thread switch happens after the DSB to ensure prev's accesses + * use prev's MPAM settings. + */ + mpam_thread_switch(next); + /* the actual thread switch */ last = cpu_switch_to(prev, next); diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 751ca35386b0e..b2a562217d3ff 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -496,6 +496,7 @@ #define X86_FEATURE_TSA_L1_NO (21*32+12) /* AMD CPU not vulnerable to TSA-L1 */ #define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */ #define X86_FEATURE_IBPB_EXIT_TO_USER (21*32+14) /* Use IBPB on exit-to-userspace, see VMSCAPE bug */ +#define X86_FEATURE_ABMC (21*32+15) /* Assignable Bandwidth Monitoring Counters */ /* * BUG word(s) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 20fa4a79df137..97420e1c92e1a 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -1224,6 +1224,8 @@ /* - AMD: */ #define MSR_IA32_MBA_BW_BASE 0xc0000200 #define MSR_IA32_SMBA_BW_BASE 0xc0000280 +#define MSR_IA32_L3_QOS_ABMC_CFG 0xc00003fd +#define MSR_IA32_L3_QOS_EXT_CFG 0xc00003ff #define MSR_IA32_EVT_CFG_BASE 0xc0000400 /* AMD-V MSRs */ diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h index feb93b50e990a..279aba8e97bf5 100644 --- a/arch/x86/include/asm/resctrl.h +++ b/arch/x86/include/asm/resctrl.h @@ -44,7 +44,6 @@ DECLARE_PER_CPU(struct resctrl_pqr_state, pqr_state); extern bool rdt_alloc_capable; extern bool rdt_mon_capable; -extern unsigned int rdt_mon_features; DECLARE_STATIC_KEY_FALSE(rdt_enable_key); DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key); @@ -84,21 +83,6 @@ static inline void resctrl_arch_disable_mon(void) static_branch_dec_cpuslocked(&rdt_enable_key); } -static inline bool resctrl_arch_is_llc_occupancy_enabled(void) -{ - return (rdt_mon_features & (1 << QOS_L3_OCCUP_EVENT_ID)); -} - -static inline bool resctrl_arch_is_mbm_total_enabled(void) -{ - return (rdt_mon_features & (1 << QOS_L3_MBM_TOTAL_EVENT_ID)); -} - -static inline bool resctrl_arch_is_mbm_local_enabled(void) -{ - return (rdt_mon_features & (1 << QOS_L3_MBM_LOCAL_EVENT_ID)); -} - /* * __resctrl_sched_in() - Writes the task's CLOSid/RMID to IA32_PQR_MSR * @@ -207,8 +191,22 @@ static inline void resctrl_arch_mon_ctx_free(struct rdt_resource *r, enum resctrl_event_id evtid, void *ctx) { } +static inline bool resctrl_arch_mon_can_overflow(void) +{ + return true; +} + void resctrl_cpu_detect(struct cpuinfo_x86 *c); +static inline bool resctrl_arch_get_mb_uses_numa_nid(void) +{ + return false; +} + +static inline int resctrl_arch_set_mb_uses_numa_nid(bool enabled) +{ + return -EOPNOTSUPP; +} #else static inline void resctrl_arch_sched_in(struct task_struct *tsk) {} diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 187d527ef73b6..42fcc9d7ff7a2 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -88,7 +88,6 @@ struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = { .name = "MB", .ctrl_scope = RESCTRL_L3_CACHE, .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_MBA), - .schema_fmt = RESCTRL_SCHEMA_RANGE, }, }, [RDT_RESOURCE_SMBA] = @@ -97,7 +96,6 @@ struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = { .name = "SMBA", .ctrl_scope = RESCTRL_L3_CACHE, .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_SMBA), - .schema_fmt = RESCTRL_SCHEMA_RANGE, }, }, }; @@ -107,7 +105,7 @@ u32 resctrl_arch_system_num_rmid_idx(void) struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; /* RMID are independent numbers for x86. num_rmid_idx == num_rmid */ - return r->num_rmid; + return r->mon.num_rmid; } struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l) @@ -192,22 +190,23 @@ static __init bool __get_mem_config_intel(struct rdt_resource *r) cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full); hw_res->num_closid = edx.split.cos_max + 1; max_delay = eax.split.max_delay + 1; + r->schema_fmt = RESCTRL_SCHEMA_PERCENT; r->membw.max_bw = MAX_MBA_BW; - r->membw.arch_needs_linear = true; + r->mba.arch_needs_linear = true; if (ecx & MBA_IS_LINEAR) { - r->membw.delay_linear = true; + r->mba.delay_linear = true; r->membw.min_bw = MAX_MBA_BW - max_delay; r->membw.bw_gran = MAX_MBA_BW - max_delay; } else { if (!rdt_get_mb_table(r)) return false; - r->membw.arch_needs_linear = false; + r->mba.arch_needs_linear = false; } if (boot_cpu_has(X86_FEATURE_PER_THREAD_MBA)) - r->membw.throttle_mode = THREAD_THROTTLE_PER_THREAD; + r->mba.throttle_mode = THREAD_THROTTLE_PER_THREAD; else - r->membw.throttle_mode = THREAD_THROTTLE_MAX; + r->mba.throttle_mode = THREAD_THROTTLE_MAX; r->alloc_capable = true; @@ -227,17 +226,18 @@ static __init bool __rdt_get_mem_config_amd(struct rdt_resource *r) cpuid_count(0x80000020, subleaf, &eax, &ebx, &ecx, &edx); hw_res->num_closid = edx + 1; + r->schema_fmt = RESCTRL_SCHEMA__AMD_MBA; r->membw.max_bw = 1 << eax; /* AMD does not use delay */ - r->membw.delay_linear = false; - r->membw.arch_needs_linear = false; + r->mba.delay_linear = false; + r->mba.arch_needs_linear = false; /* * AMD does not use memory delay throttle model to control * the allocation like Intel does. */ - r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED; + r->mba.throttle_mode = THREAD_THROTTLE_UNDEFINED; r->membw.min_bw = 0; r->membw.bw_gran = 1; @@ -301,7 +301,7 @@ static void mba_wrmsr_amd(struct msr_param *m) */ static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r) { - if (r->membw.delay_linear) + if (r->mba.delay_linear) return MAX_MBA_BW - bw; pr_warn_once("Non Linear delay-bw map not supported but queried\n"); @@ -354,7 +354,7 @@ static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc) * For Memory Allocation: Set b/w requested to 100% */ for (i = 0; i < hw_res->num_closid; i++, dc++) - *dc = resctrl_get_default_ctrl(r); + *dc = resctrl_get_resource_default_ctrl(r); } static void ctrl_domain_free(struct rdt_hw_ctrl_domain *hw_dom) @@ -365,8 +365,10 @@ static void ctrl_domain_free(struct rdt_hw_ctrl_domain *hw_dom) static void mon_domain_free(struct rdt_hw_mon_domain *hw_dom) { - kfree(hw_dom->arch_mbm_total); - kfree(hw_dom->arch_mbm_local); + int idx; + + for_each_mbm_idx(idx) + kfree(hw_dom->arch_mbm_states[idx]); kfree(hw_dom); } @@ -400,25 +402,27 @@ static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_ctrl_domain * */ static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_mon_domain *hw_dom) { - size_t tsize; - - if (resctrl_arch_is_mbm_total_enabled()) { - tsize = sizeof(*hw_dom->arch_mbm_total); - hw_dom->arch_mbm_total = kcalloc(num_rmid, tsize, GFP_KERNEL); - if (!hw_dom->arch_mbm_total) - return -ENOMEM; - } - if (resctrl_arch_is_mbm_local_enabled()) { - tsize = sizeof(*hw_dom->arch_mbm_local); - hw_dom->arch_mbm_local = kcalloc(num_rmid, tsize, GFP_KERNEL); - if (!hw_dom->arch_mbm_local) { - kfree(hw_dom->arch_mbm_total); - hw_dom->arch_mbm_total = NULL; - return -ENOMEM; - } + size_t tsize = sizeof(*hw_dom->arch_mbm_states[0]); + enum resctrl_event_id eventid; + int idx; + + for_each_mbm_event_id(eventid) { + if (!resctrl_is_mon_event_enabled(eventid)) + continue; + idx = MBM_STATE_IDX(eventid); + hw_dom->arch_mbm_states[idx] = kcalloc(num_rmid, tsize, GFP_KERNEL); + if (!hw_dom->arch_mbm_states[idx]) + goto cleanup; } return 0; +cleanup: + for_each_mbm_idx(idx) { + kfree(hw_dom->arch_mbm_states[idx]); + hw_dom->arch_mbm_states[idx] = NULL; + } + + return -ENOMEM; } static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope) @@ -516,6 +520,9 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) d = container_of(hdr, struct rdt_mon_domain, hdr); cpumask_set_cpu(cpu, &d->hdr.cpu_mask); + /* Update the mbm_assign_mode state for the CPU if supported */ + if (r->mon.mbm_cntr_assignable) + resctrl_arch_mbm_cntr_assign_set_one(r); return; } @@ -535,9 +542,13 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) d->ci_id = ci->id; cpumask_set_cpu(cpu, &d->hdr.cpu_mask); + /* Update the mbm_assign_mode state for the CPU if supported */ + if (r->mon.mbm_cntr_assignable) + resctrl_arch_mbm_cntr_assign_set_one(r); + arch_mon_domain_online(r, d); - if (arch_domain_mbm_alloc(r->num_rmid, hw_dom)) { + if (arch_domain_mbm_alloc(r->mon.num_rmid, hw_dom)) { mon_domain_free(hw_dom); return; } @@ -707,6 +718,7 @@ enum { RDT_FLAG_MBA, RDT_FLAG_SMBA, RDT_FLAG_BMEC, + RDT_FLAG_ABMC, }; #define RDT_OPT(idx, n, f) \ @@ -732,6 +744,7 @@ static struct rdt_options rdt_options[] __ro_after_init = { RDT_OPT(RDT_FLAG_MBA, "mba", X86_FEATURE_MBA), RDT_OPT(RDT_FLAG_SMBA, "smba", X86_FEATURE_SMBA), RDT_OPT(RDT_FLAG_BMEC, "bmec", X86_FEATURE_BMEC), + RDT_OPT(RDT_FLAG_ABMC, "abmc", X86_FEATURE_ABMC), }; #define NUM_RDT_OPTIONS ARRAY_SIZE(rdt_options) @@ -863,15 +876,24 @@ static __init bool get_rdt_alloc_resources(void) static __init bool get_rdt_mon_resources(void) { struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + bool ret = false; - if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC)) - rdt_mon_features |= (1 << QOS_L3_OCCUP_EVENT_ID); - if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) - rdt_mon_features |= (1 << QOS_L3_MBM_TOTAL_EVENT_ID); - if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL)) - rdt_mon_features |= (1 << QOS_L3_MBM_LOCAL_EVENT_ID); + if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC)) { + resctrl_enable_mon_event(QOS_L3_OCCUP_EVENT_ID); + ret = true; + } + if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) { + resctrl_enable_mon_event(QOS_L3_MBM_TOTAL_EVENT_ID); + ret = true; + } + if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL)) { + resctrl_enable_mon_event(QOS_L3_MBM_LOCAL_EVENT_ID); + ret = true; + } + if (rdt_cpu_has(X86_FEATURE_ABMC)) + ret = true; - if (!rdt_mon_features) + if (!ret) return false; return !rdt_get_mon_l3_config(r); @@ -965,7 +987,7 @@ static enum cpuhp_state rdt_online; /* Runs once on the BSP during boot. */ void resctrl_cpu_detect(struct cpuinfo_x86 *c) { - if (!cpu_has(c, X86_FEATURE_CQM_LLC)) { + if (!cpu_has(c, X86_FEATURE_CQM_LLC) && !cpu_has(c, X86_FEATURE_ABMC)) { c->x86_cache_max_rmid = -1; c->x86_cache_occ_scale = -1; c->x86_cache_mbm_width_offset = -1; @@ -977,7 +999,8 @@ void resctrl_cpu_detect(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) || cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) || - cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) { + cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL) || + cpu_has(c, X86_FEATURE_ABMC)) { u32 eax, ebx, ecx, edx; /* QoS sub-leaf, EAX=0Fh, ECX=1 */ diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index 1189c0df4ad76..a7828c31c118c 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -16,9 +16,15 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include "internal.h" +u32 resctrl_arch_round_bw(u32 val, const struct resctrl_schema *s) +{ + return roundup(val, (unsigned long)s->membw.bw_gran); +} + int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d, u32 closid, enum resctrl_conf_type t, u32 cfg_val) { diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 5e3c41b364373..9f4c2f0aaf5c8 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -37,6 +37,15 @@ struct arch_mbm_state { u64 prev_msr; }; +/* Setting bit 0 in L3_QOS_EXT_CFG enables the ABMC feature. */ +#define ABMC_ENABLE_BIT 0 + +/* + * Qos Event Identifiers. + */ +#define ABMC_EXTENDED_EVT_ID BIT(31) +#define ABMC_EVT_ID BIT(0) + /** * struct rdt_hw_ctrl_domain - Arch private attributes of a set of CPUs that share * a resource for a control function @@ -54,15 +63,15 @@ struct rdt_hw_ctrl_domain { * struct rdt_hw_mon_domain - Arch private attributes of a set of CPUs that share * a resource for a monitor function * @d_resctrl: Properties exposed to the resctrl file system - * @arch_mbm_total: arch private state for MBM total bandwidth - * @arch_mbm_local: arch private state for MBM local bandwidth + * @arch_mbm_states: Per-event pointer to the MBM event's saved state. + * An MBM event's state is an array of struct arch_mbm_state + * indexed by RMID on x86. * * Members of this structure are accessed via helpers that provide abstraction. */ struct rdt_hw_mon_domain { struct rdt_mon_domain d_resctrl; - struct arch_mbm_state *arch_mbm_total; - struct arch_mbm_state *arch_mbm_local; + struct arch_mbm_state *arch_mbm_states[QOS_NUM_L3_MBM_EVENTS]; }; static inline struct rdt_hw_ctrl_domain *resctrl_to_arch_ctrl_dom(struct rdt_ctrl_domain *r) @@ -102,6 +111,7 @@ struct msr_param { * @mon_scale: cqm counter * mon_scale = occupancy in bytes * @mbm_width: Monitor width, to detect and correct for overflow. * @cdp_enabled: CDP state of this resource + * @mbm_cntr_assign_enabled: ABMC feature is enabled * * Members of this structure are either private to the architecture * e.g. mbm_width, or accessed via helpers that provide abstraction. e.g. @@ -115,6 +125,7 @@ struct rdt_hw_resource { unsigned int mon_scale; unsigned int mbm_width; bool cdp_enabled; + bool mbm_cntr_assign_enabled; }; static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r) @@ -159,6 +170,42 @@ union cpuid_0x10_x_edx { unsigned int full; }; +/* + * ABMC counters are configured by writing to MSR_IA32_L3_QOS_ABMC_CFG. + * + * @bw_type : Event configuration that represents the memory + * transactions being tracked by the @cntr_id. + * @bw_src : Bandwidth source (RMID or CLOSID). + * @reserved1 : Reserved. + * @is_clos : @bw_src field is a CLOSID (not an RMID). + * @cntr_id : Counter identifier. + * @reserved : Reserved. + * @cntr_en : Counting enable bit. + * @cfg_en : Configuration enable bit. + * + * Configuration and counting: + * Counter can be configured across multiple writes to MSR. Configuration + * is applied only when @cfg_en = 1. Counter @cntr_id is reset when the + * configuration is applied. + * @cfg_en = 1, @cntr_en = 0 : Apply @cntr_id configuration but do not + * count events. + * @cfg_en = 1, @cntr_en = 1 : Apply @cntr_id configuration and start + * counting events. + */ +union l3_qos_abmc_cfg { + struct { + unsigned long bw_type :32, + bw_src :12, + reserved1: 3, + is_clos : 1, + cntr_id : 5, + reserved : 9, + cntr_en : 1, + cfg_en : 1; + } split; + unsigned long full; +}; + void rdt_ctrl_update(void *arg); int rdt_get_mon_l3_config(struct rdt_resource *r); @@ -168,5 +215,6 @@ bool rdt_cpu_has(int flag); void __init intel_rdt_mbm_apply_quirk(void); void rdt_domain_reconfigure_cdp(struct rdt_resource *r); +void resctrl_arch_mbm_cntr_assign_set_one(struct rdt_resource *r); #endif /* _ASM_X86_RESCTRL_INTERNAL_H */ diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index eed0f8417b8c5..fe1a2aa53c16a 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -31,11 +31,6 @@ */ bool rdt_mon_capable; -/* - * Global to indicate which monitoring events are enabled. - */ -unsigned int rdt_mon_features; - #define CF(cf) ((unsigned long)(1048576 * (cf) + 0.5)) static int snc_nodes_per_l3_cache = 1; @@ -135,7 +130,7 @@ static int logical_rmid_to_physical_rmid(int cpu, int lrmid) if (snc_nodes_per_l3_cache == 1) return lrmid; - return lrmid + (cpu_to_node(cpu) % snc_nodes_per_l3_cache) * r->num_rmid; + return lrmid + (cpu_to_node(cpu) % snc_nodes_per_l3_cache) * r->mon.num_rmid; } static int __rmid_read_phys(u32 prmid, enum resctrl_event_id eventid, u64 *val) @@ -166,18 +161,14 @@ static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_mon_domain *hw_do u32 rmid, enum resctrl_event_id eventid) { - switch (eventid) { - case QOS_L3_OCCUP_EVENT_ID: - return NULL; - case QOS_L3_MBM_TOTAL_EVENT_ID: - return &hw_dom->arch_mbm_total[rmid]; - case QOS_L3_MBM_LOCAL_EVENT_ID: - return &hw_dom->arch_mbm_local[rmid]; - default: - /* Never expect to get here */ - WARN_ON_ONCE(1); + struct arch_mbm_state *state; + + if (!resctrl_is_mbm_event(eventid)) return NULL; - } + + state = hw_dom->arch_mbm_states[MBM_STATE_IDX(eventid)]; + + return state ? &state[rmid] : NULL; } void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d, @@ -206,14 +197,16 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d, void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d) { struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); - - if (resctrl_arch_is_mbm_total_enabled()) - memset(hw_dom->arch_mbm_total, 0, - sizeof(*hw_dom->arch_mbm_total) * r->num_rmid); - - if (resctrl_arch_is_mbm_local_enabled()) - memset(hw_dom->arch_mbm_local, 0, - sizeof(*hw_dom->arch_mbm_local) * r->num_rmid); + enum resctrl_event_id eventid; + int idx; + + for_each_mbm_event_id(eventid) { + if (!resctrl_is_mon_event_enabled(eventid)) + continue; + idx = MBM_STATE_IDX(eventid); + memset(hw_dom->arch_mbm_states[idx], 0, + sizeof(*hw_dom->arch_mbm_states[0]) * r->mon.num_rmid); + } } static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width) @@ -272,6 +265,75 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, return ret; } +static int __cntr_id_read(u32 cntr_id, u64 *val) +{ + u64 msr_val; + + /* + * QM_EVTSEL Register definition: + * ======================================================= + * Bits Mnemonic Description + * ======================================================= + * 63:44 -- Reserved + * 43:32 RMID RMID or counter ID in ABMC mode + * when reading an MBM event + * 31 ExtendedEvtID Extended Event Identifier + * 30:8 -- Reserved + * 7:0 EvtID Event Identifier + * ======================================================= + * The contents of a specific counter can be read by setting the + * following fields in QM_EVTSEL.ExtendedEvtID(=1) and + * QM_EVTSEL.EvtID = L3CacheABMC (=1) and setting QM_EVTSEL.RMID + * to the desired counter ID. Reading the QM_CTR then returns the + * contents of the specified counter. The RMID_VAL_ERROR bit is set + * if the counter configuration is invalid, or if an invalid counter + * ID is set in the QM_EVTSEL.RMID field. The RMID_VAL_UNAVAIL bit + * is set if the counter data is unavailable. + */ + wrmsr(MSR_IA32_QM_EVTSEL, ABMC_EXTENDED_EVT_ID | ABMC_EVT_ID, cntr_id); + rdmsrl(MSR_IA32_QM_CTR, msr_val); + + if (msr_val & RMID_VAL_ERROR) + return -EIO; + if (msr_val & RMID_VAL_UNAVAIL) + return -EINVAL; + + *val = msr_val; + return 0; +} + +void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, + u32 unused, u32 rmid, int cntr_id, + enum resctrl_event_id eventid) +{ + struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); + struct arch_mbm_state *am; + + am = get_arch_mbm_state(hw_dom, rmid, eventid); + if (am) { + memset(am, 0, sizeof(*am)); + + /* Record any initial, non-zero count value. */ + __cntr_id_read(cntr_id, &am->prev_msr); + } +} + +int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_mon_domain *d, + u32 unused, u32 rmid, int cntr_id, + enum resctrl_event_id eventid, u64 *val) +{ + u64 msr_val; + int ret; + + ret = __cntr_id_read(cntr_id, &msr_val); + if (ret) + return ret; + + *val = get_corrected_val(r, d, rmid, eventid, msr_val); + + return 0; +} + /* * The power-on reset value of MSR_RMID_SNC_CONFIG is 0x1 * which indicates that RMIDs are configured in legacy mode. @@ -360,12 +422,13 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset; struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); unsigned int threshold; + u32 eax, ebx, ecx, edx; snc_nodes_per_l3_cache = snc_get_config(); resctrl_rmid_realloc_limit = boot_cpu_data.x86_cache_size * 1024; hw_res->mon_scale = boot_cpu_data.x86_cache_occ_scale / snc_nodes_per_l3_cache; - r->num_rmid = (boot_cpu_data.x86_cache_max_rmid + 1) / snc_nodes_per_l3_cache; + r->mon.num_rmid = (boot_cpu_data.x86_cache_max_rmid + 1) / snc_nodes_per_l3_cache; hw_res->mbm_width = MBM_CNTR_WIDTH_BASE; if (mbm_offset > 0 && mbm_offset <= MBM_CNTR_WIDTH_OFFSET_MAX) @@ -380,7 +443,7 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) * * For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC. */ - threshold = resctrl_rmid_realloc_limit / r->num_rmid; + threshold = resctrl_rmid_realloc_limit / r->mon.num_rmid; /* * Because num_rmid may not be a power of two, round the value @@ -389,12 +452,26 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) */ resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(threshold); - if (rdt_cpu_has(X86_FEATURE_BMEC)) { - u32 eax, ebx, ecx, edx; - + if (rdt_cpu_has(X86_FEATURE_BMEC) || rdt_cpu_has(X86_FEATURE_ABMC)) { /* Detect list of bandwidth sources that can be tracked */ cpuid_count(0x80000020, 3, &eax, &ebx, &ecx, &edx); - r->mbm_cfg_mask = ecx & MAX_EVT_CONFIG_BITS; + r->mon.mbm_cfg_mask = ecx & MAX_EVT_CONFIG_BITS; + } + + /* + * resctrl assumes a system that supports assignable counters can + * switch to "default" mode. Ensure that there is a "default" mode + * to switch to. This enforces a dependency between the independent + * X86_FEATURE_ABMC and X86_FEATURE_CQM_MBM_TOTAL/X86_FEATURE_CQM_MBM_LOCAL + * hardware features. + */ + if (rdt_cpu_has(X86_FEATURE_ABMC) && + (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL) || + rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL))) { + r->mon.mbm_cntr_assignable = true; + cpuid_count(0x80000020, 5, &eax, &ebx, &ecx, &edx); + r->mon.num_mbm_cntrs = (ebx & GENMASK(15, 0)) + 1; + hw_res->mbm_cntr_assign_enabled = true; } r->mon_capable = true; @@ -415,3 +492,91 @@ void __init intel_rdt_mbm_apply_quirk(void) mbm_cf_rmidthreshold = mbm_cf_table[cf_index].rmidthreshold; mbm_cf = mbm_cf_table[cf_index].cf; } + +static void resctrl_abmc_set_one_amd(void *arg) +{ + bool *enable = arg; + + if (*enable) + msr_set_bit(MSR_IA32_L3_QOS_EXT_CFG, ABMC_ENABLE_BIT); + else + msr_clear_bit(MSR_IA32_L3_QOS_EXT_CFG, ABMC_ENABLE_BIT); +} + +/* + * ABMC enable/disable requires update of L3_QOS_EXT_CFG MSR on all the CPUs + * associated with all monitor domains. + */ +static void _resctrl_abmc_enable(struct rdt_resource *r, bool enable) +{ + struct rdt_mon_domain *d; + + lockdep_assert_cpus_held(); + + list_for_each_entry(d, &r->mon_domains, hdr.list) { + on_each_cpu_mask(&d->hdr.cpu_mask, resctrl_abmc_set_one_amd, + &enable, 1); + resctrl_arch_reset_rmid_all(r, d); + } +} + +int resctrl_arch_mbm_cntr_assign_set(struct rdt_resource *r, bool enable) +{ + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); + + if (r->mon.mbm_cntr_assignable && + hw_res->mbm_cntr_assign_enabled != enable) { + _resctrl_abmc_enable(r, enable); + hw_res->mbm_cntr_assign_enabled = enable; + } + + return 0; +} + +bool resctrl_arch_mbm_cntr_assign_enabled(struct rdt_resource *r) +{ + return resctrl_to_arch_res(r)->mbm_cntr_assign_enabled; +} + +static void resctrl_abmc_config_one_amd(void *info) +{ + union l3_qos_abmc_cfg *abmc_cfg = info; + + wrmsrl(MSR_IA32_L3_QOS_ABMC_CFG, abmc_cfg->full); +} + +/* + * Send an IPI to the domain to assign the counter to RMID, event pair. + */ +void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, + enum resctrl_event_id evtid, u32 rmid, u32 closid, + u32 cntr_id, bool assign) +{ + struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); + union l3_qos_abmc_cfg abmc_cfg = { 0 }; + struct arch_mbm_state *am; + + abmc_cfg.split.cfg_en = 1; + abmc_cfg.split.cntr_en = assign ? 1 : 0; + abmc_cfg.split.cntr_id = cntr_id; + abmc_cfg.split.bw_src = rmid; + if (assign) + abmc_cfg.split.bw_type = resctrl_get_mon_evt_cfg(evtid); + + smp_call_function_any(&d->hdr.cpu_mask, resctrl_abmc_config_one_amd, &abmc_cfg, 1); + + /* + * The hardware counter is reset (because cfg_en == 1) so there is no + * need to record initial non-zero counts. + */ + am = get_arch_mbm_state(hw_dom, rmid, evtid); + if (am) + memset(am, 0, sizeof(*am)); +} + +void resctrl_arch_mbm_cntr_assign_set_one(struct rdt_resource *r) +{ + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); + + resctrl_abmc_set_one_amd(&hw_res->mbm_cntr_assign_enabled); +} diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 8850264684405..8a017f1111028 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -253,7 +253,7 @@ void resctrl_arch_reset_all_ctrls(struct rdt_resource *r) hw_dom = resctrl_to_arch_ctrl_dom(d); for (i = 0; i < hw_res->num_closid; i++) - hw_dom->ctrl_val[i] = resctrl_get_default_ctrl(r); + hw_dom->ctrl_val[i] = resctrl_get_resource_default_ctrl(r); msr_param.dom = d; smp_call_function_any(&d->hdr.cpu_mask, rdt_ctrl_update, &msr_param, 1); } diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 6b868afb26c31..4cee6213d6673 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -51,6 +51,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_COHERENCY_SFW_NO, CPUID_EBX, 31, 0x8000001f, 0 }, { X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 }, { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, + { X86_FEATURE_ABMC, CPUID_EBX, 5, 0x80000020, 0 }, { X86_FEATURE_TSA_SQ_NO, CPUID_ECX, 1, 0x80000021, 0 }, { X86_FEATURE_TSA_L1_NO, CPUID_ECX, 2, 0x80000021, 0 }, { X86_FEATURE_AMD_WORKLOAD_CLASS, CPUID_EAX, 22, 0x80000021, 0 }, diff --git a/debian.master/rules.d/arm64.mk b/debian.master/rules.d/arm64.mk index ad6fbd06ab703..702e011fe5631 100644 --- a/debian.master/rules.d/arm64.mk +++ b/debian.master/rules.d/arm64.mk @@ -1,8 +1,8 @@ build_arch = arm64 defconfig = defconfig flavours = generic generic-64k -build_image = vmlinuz.efi -kernel_file = arch/$(build_arch)/boot/vmlinuz.efi +build_image = Image.gz +kernel_file = arch/$(build_arch)/boot/Image.gz install_file = vmlinuz no_dumpfile = true uefi_signed = true diff --git a/debian.nvidia-6.17/changelog b/debian.nvidia-6.17/changelog index c03962f98acd6..0e7f10d1594ec 100644 --- a/debian.nvidia-6.17/changelog +++ b/debian.nvidia-6.17/changelog @@ -1,10 +1,3590 @@ -linux-nvidia-6.17 (6.17.0-1004.4) UNRELEASED; urgency=medium +linux-nvidia-6.17 (6.17.0-1008.8) noble; urgency=medium - CHANGELOG: Do not edit directly. Autogenerated at release. - CHANGELOG: Use the printchanges target to see the current changes. - CHANGELOG: Use the insertchanges target to create the final log. + * noble/linux-nvidia-6.17: 6.17.0-1008.8 -proposed tracker (LP: #2138765) - -- Jacob Martin Fri, 14 Nov 2025 21:37:58 -0600 + * mt7925: Incorrect MLO mode in firmware control (LP: #2138755) + - NVIDIA: SAUCE: wifi: mt76: mt7925: Fix incorrect MLO mode in firmware + control + + -- Abdur Rahman Tue, 20 Jan 2026 16:54:12 -0500 + +linux-nvidia-6.17 (6.17.0-1007.7) noble; urgency=medium + + * noble/linux-nvidia-6.17: 6.17.0-1007.7 -proposed tracker (LP: #2137561) + + * Packaging resync (LP: #1786013) + - [Packaging] debian.nvidia-6.17/dkms-versions -- update from kernel- + versions (main/d2025.12.18) + + * Enable GDS in the 6.8 based linux-nvidia kernel (LP: #2059814) + - NVIDIA: [Config] Add nvidia-fs build dependencies + + * Add PCIe Hotplug Driver for CX7 on DGX Spark (LP: #2138269) + - NVIDIA: SAUCE: MEDIATEK: platform: Add PCIe Hotplug Driver for CX7 on + DGX Spark + + * Backport support for Grace MPAM (LP: #2122432) + - x86,fs/resctrl: Consolidate monitor event descriptions + - x86,fs/resctrl: Replace architecture event enabled checks + - x86/resctrl: Remove the rdt_mon_features global variable + - x86,fs/resctrl: Prepare for more monitor events + - x86/cpufeatures: Add support for Assignable Bandwidth Monitoring + Counters (ABMC) + - x86/resctrl: Add ABMC feature in the command line options + - x86,fs/resctrl: Consolidate monitoring related data from rdt_resource + - x86,fs/resctrl: Detect Assignable Bandwidth Monitoring feature details + - x86/resctrl: Add support to enable/disable AMD ABMC feature + - fs/resctrl: Introduce the interface to display monitoring modes + - fs/resctrl: Add resctrl file to display number of assignable counters + - fs/resctrl: Introduce mbm_cntr_cfg to track assignable counters per + domain + - fs/resctrl: Introduce interface to display number of free MBM counters + - x86/resctrl: Add data structures and definitions for ABMC assignment + - fs/resctrl: Introduce event configuration field in struct mon_evt + - x86,fs/resctrl: Implement resctrl_arch_config_cntr() to assign a counter + with ABMC + - fs/resctrl: Add the functionality to assign MBM events + - fs/resctrl: Add the functionality to unassign MBM events + - fs/resctrl: Pass struct rdtgroup instead of individual members + - fs/resctrl: Introduce counter ID read, reset calls in mbm_event mode + - x86/resctrl: Implement resctrl_arch_reset_cntr() and + resctrl_arch_cntr_read() + - fs/resctrl: Support counter read/reset with mbm_event assignment mode + - fs/resctrl: Add event configuration directory under info/L3_MON/ + - fs/resctrl: Provide interface to update the event configurations + - fs/resctrl: Introduce mbm_assign_on_mkdir to enable assignments on mkdir + - fs/resctrl: Auto assign counters on mkdir and clean up on group removal + - fs/resctrl: Introduce mbm_L3_assignments to list assignments in a group + - fs/resctrl: Introduce the interface to modify assignments in a group + - fs/resctrl: Disable BMEC event configuration when mbm_event mode is + enabled + - fs/resctrl: Introduce the interface to switch between monitor modes + - x86/resctrl: Configure mbm_event mode if supported + - MAINTAINERS: resctrl: Add myself as reviewer + - fs/resctrl: Fix counter auto-assignment on mkdir with mbm_event enabled + - NVIDIA: SAUCE: DT: cacheinfo: Expose the code to generate a cache-id + from a device_node + - NVIDIA: SAUCE: ACPI / PPTT: Add a helper to fill a cpumask from a + processor container + - NVIDIA: SAUCE: ACPI / PPTT: Stop acpi_count_levels() expecting callers + to clear levels + - NVIDIA: SAUCE: ACPI / PPTT: Find cache level by cache-id + - NVIDIA: SAUCE: ACPI / PPTT: Add a helper to fill a cpumask from a + cache_id + - NVIDIA: SAUCE: DROP: ACPI / PPTT: Add a for_each_acpi_pptt_entry() + helper + - NVIDIA: SAUCE: arm64: kconfig: Add Kconfig entry for MPAM + - NVIDIA: SAUCE: ACPI / MPAM: Parse the MPAM table + - NVIDIA: SAUCE: DT: dt-bindings: arm: Add MPAM MSC binding + - NVIDIA: SAUCE: arm_mpam: Add probe/remove for mpam msc driver and kbuild + boiler plate + - NVIDIA: SAUCE: arm_mpam: parse resources + - NVIDIA: SAUCE: DT: arm_mpam: Add support for memory controller MSC on DT + platforms + - NVIDIA: SAUCE: arm_mpam: Add the class and component structures for + firmware described ris + - NVIDIA: SAUCE: arm_mpam: Add MPAM MSC register layout definitions + - NVIDIA: SAUCE: arm_mpam: Add cpuhp callbacks to probe MSC hardware + - NVIDIA: SAUCE: arm_mpam: Probe hardware to find the supported partid/pmg + values + - NVIDIA: SAUCE: arm_mpam: Add helpers for managing the locking around the + mon_sel registers + - NVIDIA: SAUCE: arm_mpam: Probe the hardware features resctrl supports + - NVIDIA: SAUCE: arm_mpam: Merge supported features during mpam_enable() + into mpam_class + - NVIDIA: SAUCE: arm_mpam: Reset MSC controls from cpuhp callbacks + - NVIDIA: SAUCE: arm_mpam: Add a helper to touch an MSC from any CPU + - NVIDIA: SAUCE: arm_mpam: Extend reset logic to allow devices to be reset + any time + - NVIDIA: SAUCE: arm_mpam: Register and enable IRQs + - NVIDIA: SAUCE: arm_mpam: Use a static key to indicate when mpam is + enabled + - NVIDIA: SAUCE: arm_mpam: Allow configuration to be applied and restored + during cpu online + - NVIDIA: SAUCE: arm_mpam: Probe and reset the rest of the features + - NVIDIA: SAUCE: arm_mpam: Add helpers to allocate monitors + - NVIDIA: SAUCE: arm_mpam: Add mpam_msmon_read() to read monitor value + - NVIDIA: SAUCE: fixup for _msmon_read, reported by Zeng + - NVIDIA: SAUCE: arm_mpam: Track bandwidth counter state for overflow and + power management + - NVIDIA: SAUCE: arm_mpam: Probe for long/lwd mbwu counters + - NVIDIA: SAUCE: arm_mpam: Use long MBWU counters if supported + - NVIDIA: SAUCE: arm_mpam: Add helper to reset saved mbwu state + - NVIDIA: SAUCE: arm_mpam: Add kunit test for bitmap reset + - NVIDIA: SAUCE: arm_mpam: Add kunit tests for props_mismatch() + - NVIDIA: SAUCE: arm64: mpam: Context switch the MPAM registers + - NVIDIA: SAUCE: arm64: mpam: Re-initialise MPAM regs when CPU comes + online + - NVIDIA: SAUCE: arm64: mpam: Advertise the CPUs MPAM limits to the driver + - NVIDIA: SAUCE: arm64: mpam: Add cpu_pm notifier to restore MPAM sysregs + - NVIDIA: SAUCE: arm64: mpam: Add helpers to change a tasks and cpu mpam + partid/pmg values + - NVIDIA: SAUCE: cacheinfo: Add helper to find the cache size from + cpu+level + - NVIDIA: SAUCE: arm_mpam: resctrl: Add boilerplate cpuhp and domain + allocation + - NVIDIA: SAUCE: arm_mpam: resctrl: Pick the caches we will use as resctrl + resources + - NVIDIA: SAUCE: arm_mpam: resctrl: Implement + resctrl_arch_reset_all_ctrls() + - NVIDIA: SAUCE: arm_mpam: resctrl: Add resctrl_arch_get_config() + - NVIDIA: SAUCE: arm_mpam: resctrl: Implement helpers to update + configuration + - NVIDIA: SAUCE: arm_mpam: resctrl: Add plumbing against arm64 task and + cpu hooks + - NVIDIA: SAUCE: arm_mpam: resctrl: Add CDP emulation + - NVIDIA: SAUCE: arm_mpam: resctrl: Add rmid index helpers + - NVIDIA: SAUCE: arm_mpam: resctrl: Convert to/from MPAMs bitmaps and + fixed-point formats + - NVIDIA: SAUCE: arm_mpam: resctrl: Add support for 'MB' resource + - NVIDIA: SAUCE: arm_mpam: resctrl: Reject oversized memory bandwidth + portion bitmaps + - NVIDIA: SAUCE: arm_mpam: resctrl: Fix MB min_bandwidth value exposed to + userspace + - NVIDIA: SAUCE: arm_mpam: resctrl: Add kunit test for control format + conversions + - NVIDIA: SAUCE: arm_mpam: resctrl: Add support for csu counters + - NVIDIA: SAUCE: untested: arm_mpam: resctrl: pick classes for use as mbm + counters + - NVIDIA: SAUCE: arm_mpam: resctrl: Pre-allocate free running monitors + - NVIDIA: SAUCE: arm_mpam: resctrl: Pre-allocate assignable monitors + - NVIDIA: SAUCE: arm_mpam: resctrl: Add kunit test for ABMC/CDP + interactions + - NVIDIA: SAUCE: arm_mpam: resctrl: Add resctrl_arch_config_cntr() for + ABMC use + - NVIDIA: SAUCE: arm_mpam: resctrl: Allow resctrl to allocate monitors + - NVIDIA: SAUCE: arm_mpam: resctrl: Add resctrl_arch_rmid_read() and + resctrl_arch_reset_rmid() + - NVIDIA: SAUCE: arm_mpam: resctrl: Add resctrl_arch_cntr_read() & + resctrl_arch_reset_cntr() + - NVIDIA: SAUCE: untested: arm_mpam: resctrl: Allow monitors to be + configured with filters + - NVIDIA: SAUCE: arm_mpam: resctrl: Add empty definitions for fine-grained + enables + - NVIDIA: SAUCE: arm64: mpam: Select ARCH_HAS_CPU_RESCTRL + - NVIDIA: SAUCE: fs/resctrl: Don't touch rmid_ptrs[] in free_rmid() when + there are no monitors + - NVIDIA: SAUCE: fs/resctrl: Avoid a race with dom_data_exit() and + closid_num_dirty_rmid[] + - NVIDIA: SAUCE: fs/resctrl: Avoid a race with dom_data_exit() and + rmid_ptrs[] + - NVIDIA: SAUCE: perf/arm-cmn: Stop claiming all the resources + - NVIDIA: SAUCE: arm_mpam: resctrl: Call resctrl_init() on platforms that + can support resctrl + - NVIDIA: SAUCE: arm_mpam: resctrl: Call resctrl_exit() in the event of + errors + - NVIDIA: SAUCE: arm_mpam: resctrl: Update the rmid reallocation limit + - NVIDIA: SAUCE: arm_mpam: resctrl: Sort the order of the domain lists + - NVIDIA: SAUCE: arm_mpam: Generate a configuration for min controls + - NVIDIA: SAUCE: arm_mpam: Add quirk framework + - NVIDIA: SAUCE: arm_mpam: Add workaround for T241-MPAM-1 + - NVIDIA: SAUCE: arm_mpam: Add workaround for T241-MPAM-4 + - NVIDIA: SAUCE: arm_mpam: Add workaround for T241-MPAM-6 + - NVIDIA: SAUCE: arm_mpam: Quirk CMN-650's CSU NRDY behaviour + - NVIDIA: SAUCE: debugfs: Add helpers for creating cpumask entries in + debugfs + - NVIDIA: SAUCE: arm_mpam: Add debugfs entries to show the MSC/RIS the + driver discovered + - NVIDIA: SAUCE: arm_mpam: Add force-disable debugfs trigger + - NVIDIA: SAUCE: arm_mpam: Expose the number of NRDY retries in debugfs + - NVIDIA: SAUCE: arm_mpam: Add resctrl_arch_round_bw() + - NVIDIA: SAUCE: fs/resctrl,x86/resctrl: Factor mba rounding to be per- + arch + - NVIDIA: SAUCE: arm_mpam: Split the locking around the mon_sel registers + - NVIDIA: SAUCE: arm_mpam: Relax num_rmids parameter advertised to + userspace + - NVIDIA: SAUCE: arm_mpam: Allow the maximum partid to be overridden from + the command line + - NVIDIA: SAUCE: arm_mpam: Allow MSC to be forced to have an unknown + location + - NVIDIA: SAUCE: fs/resctrl: Add this_is_not_abi mount option + - NVIDIA: SAUCE: iommu/arm-smmu-v3: Register SMMU capabilities with MPAM + - NVIDIA: SAUCE: iommu/arm-smmu-v3: Add mpam helpers to query and set + state + - NVIDIA: SAUCE: iommu: Add helpers to get and set the QoS state + - NVIDIA: SAUCE: iommu: Add helpers to retrieve iommu_groups by id or + kobject + - NVIDIA: SAUCE: iommu: Add helper to retrieve iommu kset + - NVIDIA: SAUCE: kobject: Add kset_get_next_obj() to allow a kset to be + walked + - NVIDIA: SAUCE: arm_mpam: resctrl: Add iommu helpers to get/set the + partid and pmg + - NVIDIA: SAUCE: fs/resctrl: Add support for assigning iommu_groups to + resctrl groups + - NVIDIA: SAUCE: firmware: arm_scmi: add MPAM-FB SCMI protocol stub + - NVIDIA: SAUCE: arm_mpam: add MPAM-FB MSC firmware access support + - NVIDIA: SAUCE: arm_mpam: Allow duplicate PCC subspace_ids + - NVIDIA: SAUCE: untested: mpam: Convert pcc_channels list to XArray and + cleanup + - NVIDIA: SAUCE: x86/resctrl: Add stub to allow other architecture to + disable monitor overflow + - NVIDIA: SAUCE: arm_mpam: resctrl: Determine if any exposed counter can + overflow + - NVIDIA: SAUCE: fs/restrl: Allow the overflow handler to be disabled + - NVIDIA: SAUCE: fs/resctrl: Uniform data type of + component_id/domid/id/cache_id + - NVIDIA: SAUCE: arm_mpam: Allow cmax/cmin to be configured + - NVIDIA: SAUCE: arm_mpam: Rename mbw conversion to 'fract16' for code re- + use + - NVIDIA: SAUCE: fs/resctrl: Group all the MBA specific properties in a + separate struct + - NVIDIA: SAUCE: fs/resctrl: Abstract duplicate domain test to a helper + - NVIDIA: SAUCE: fs/resctrl: Move MBA supported check to parse_line() + instead of parse_bw() + - NVIDIA: SAUCE: fs/resctrl: Rename resctrl_get_default_ctrl() to include + resource + - NVIDIA: SAUCE: fs/resctrl: Add a schema format to the schema, allowing + it to be different + - NVIDIA: SAUCE: fs/resctrl: Use schema format to check the resource is a + bitmap + - NVIDIA: SAUCE: fs/resctrl: Add specific schema types for 'range' + - NVIDIA: SAUCE: x86/resctrl: Move over to specifying MBA control formats + - NVIDIA: SAUCE: arm_mpam: resctrl: Convert MB resource to use percentage + - NVIDIA: SAUCE: fs/resctrl: Remove 'range' schema format + - NVIDIA: SAUCE: fs/resctrl: Add additional files for percentage and + bitmap controls + - NVIDIA: SAUCE: fs/resctrl: Add fflags_from_schema() for files based on + schema format + - NVIDIA: SAUCE: fs/resctrl: Expose the schema format to user-space + - NVIDIA: SAUCE: fs/resctrl: Add L2 and L3 'MAX' resource schema + - NVIDIA: SAUCE: arm_mpam: resctrl: Add the glue code to convert to/from + cmax + - NVIDIA: SAUCE: mm,memory_hotplug: Add lockdep assertion helper + - NVIDIA: SAUCE: fs/resctrl: Take memory hotplug lock whenever taking CPU + hotplug lock + - NVIDIA: SAUCE: fs/resctrl: Add mount option for mb_uses_numa_nid and + arch stubs + - NVIDIA: SAUCE: Fix unused variable warning + - NVIDIA: SAUCE: arm_mpam: resctrl: Pick whether MB can use NUMA nid + instead of cache-id + - NVIDIA: SAUCE: arm_mpam: resctrl: Change domain_hdr online/offline to + work with a set of CPUs + - NVIDIA: SAUCE: untested: arm_mpam: resctrl: Split + mpam_resctrl_alloc_domain() to have CPU and node + - NVIDIA: SAUCE: arm_mpam: resctrl: Add NUMA node notifier for domain + online/offline + - NVIDIA: SAUCE: untested: arm_mpam: resctrl: Allow resctrl to enable NUMA + nid as MB domain-id + - NVIDIA: SAUCE: [Config] RESCTRL configs added to annotations + - NVIDIA: SAUCE: arm_mpam: Fix missing SHIFT definitions + - NVIDIA: SAUCE: Fix partid_max range issue + - x86,fs/resctrl: Fix NULL pointer dereference with events force-disabled + in mbm_event mode + - NVIDIA: SAUCE: [Config] Update RESCTRL annotations + - NVIDIA: SAUCE: arm_mpam: resctrl: Fix MPAM kunit + - NVIDIA: SAUCE: resctrl/mpam: Align packed mpam_props to fix arm64 KUnit + alignment fault + - NVIDIA: SAUCE: resctrl/tests: mpam_devices: compare only meaningful + bytes of mpam_props + + * Add patches to Fix CPU_CYCLES counting on SMT cores by avoiding + PMCCNTR_EL0 (LP: #2136812) + - perf: arm_pmuv3: Factor out PMCCNTR_EL0 use conditions + - perf: arm_pmuv3: Don't use PMCCNTR_EL0 on SMT cores + + [ Ubuntu: 6.17.0-14.14 ] + + * questing/linux: 6.17.0-14.14 -proposed tracker (LP: #2137849) + * Packaging resync (LP: #1786013) + - [Packaging] debian.master/dkms-versions -- update from kernel-versions + (main/2026.01.12) + * ubuntu_kselftests:_net/net:gre_gso.sh failing (LP: #2136820) + - SAUCE increase socat timeout in gre_gso.sh + * ubuntu_blktrace_smoke_test fails on questing with rust coreutils + (LP: #2137698) + - SAUCE: Revert "ext4: fail unaligned direct IO write with EINVAL" + * bareudp.sh in ubuntu_kselftests_net fails because of dash default shell + (LP: #2129812) + - selftests: net: use BASH for bareudp testing + * CVE-2025-40256 + - xfrm: also call xfrm_state_delete_tunnel at destroy time for states that + were never added + * Enable PMF on AMD HPT/STX/KRK (LP: #2125022) + - platform/x86/amd/pmf: Add support for adjusting PMF PPT and PPT APU + thresholds + - platform/x86/amd/pmf: Extend custom BIOS inputs for more policies + - platform/x86/amd/pmf: Update ta_pmf_action structure member + - platform/x86/amd/pmf: Add helper to verify BIOS input notifications are + enable/disable + - platform/x86/amd/pmf: Add custom BIOS input support for AMD_CPU_ID_PS + - platform/x86/amd/pmf: Preserve custom BIOS inputs for evaluating the + policies + - platform/x86/amd/pmf: Call enact function sooner to process early + pending requests + - platform/x86/amd/pmf: Add debug logs for pending requests and custom + BIOS inputs + * Questing update: v6.17.8 upstream stable release (LP: #2136850) + - iommufd/selftest: Fix ioctl return value in _test_cmd_trigger_vevents() + - drm/mediatek: Add pm_runtime support for GCE power control + - drm/i915: Fix conversion between clock ticks and nanoseconds + - drm/amdgpu: set default gfx reset masks for gfx6-8 + - drm/amd/display: Don't stretch non-native images by default in eDP + - smb: client: fix refcount leak in smb2_set_path_attr + - iommufd: Make vfio_compat's unmap succeed if the range is already empty + - futex: Optimize per-cpu reference counting + - drm/amd: Fix suspend failure with secure display TA + - drm/xe: Move declarations under conditional branch + - drm/xe: Do clean shutdown also when using flr + - drm/amd/display: Add pixel_clock to amd_pp_display_configuration + - drm/amd/pm: Use pm_display_cfg in legacy DPM (v2) + - drm/amd/display: Disable fastboot on DCE 6 too + - drm/amd/pm: Disable MCLK switching on SI at high pixel clocks + - drm/amd: Disable ASPM on SI + - arm64: kprobes: check the return value of set_memory_rox() + - compiler_types: Move unused static inline functions warning to W=2 + - riscv: Build loader.bin exclusively for Canaan K210 + - RISC-V: clear hot-unplugged cores from all task mm_cpumasks to avoid + rfence errors + - riscv: acpi: avoid errors caused by probing DT devices when ACPI is used + - fs: return EOPNOTSUPP from file_setattr/file_getattr syscalls + - ASoC: nau8821: Avoid unnecessary blocking in IRQ handler + - NFS4: Fix state renewals missing after boot + - drm/amdkfd: fix suspend/resume all calls in mes based eviction path + - NFS4: Apply delay_retrans to async operations + - HID: intel-thc-hid: intel-quickspi: Add ARL PCI Device Id's + - HID: quirks: avoid Cooler Master MM712 dongle wakeup bug + - ixgbe: handle IXGBE_VF_GET_PF_LINK_STATE mailbox operation + - HID: nintendo: Wait longer for initial probe + - NFS: check if suid/sgid was cleared after a write as needed + - HID: quirks: Add ALWAYS_POLL quirk for VRS R295 steering wheel + - io_uring: fix unexpected placement on same size resizing + - HID: logitech-hidpp: Add HIDPP_QUIRK_RESET_HI_RES_SCROLL + - ASoC: max98090/91: fixed max98091 ALSA widget powering up/down + - ALSA: hda/realtek: Fix mute led for HP Omen 17-cb0xxx + - ixgbe: handle IXGBE_VF_FEATURES_NEGOTIATE mbox cmd + - wifi: ath11k: zero init info->status in wmi_process_mgmt_tx_comp() + - selftests: net: local_termination: Wait for interfaces to come up + - net: fec: correct rx_bytes statistic for the case SHIFT16 is set + - net: phy: micrel: Introduce lanphy_modify_page_reg + - net: phy: micrel: Replace hardcoded pages with defines + - net: phy: micrel: lan8814 fix reset of the QSGMII interface + - rust: Add -fno-isolate-erroneous-paths-dereference to + bindgen_skip_c_flags + - NFSD: Skip close replay processing if XDR encoding fails + - Bluetooth: 6lowpan: fix BDADDR_LE vs ADDR_LE_DEV address type confusion + - Bluetooth: 6lowpan: Don't hold spin lock over sleeping functions + - Bluetooth: hci_conn: Fix not cleaning up PA_LINK connections + - net: dsa: tag_brcm: do not mark link local traffic as offloaded + - net/smc: fix mismatch between CLC header and proposal + - net/handshake: Fix memory leak in tls_handshake_accept() + - net: ethernet: ti: am65-cpsw-qos: fix IET verify/response timeout + - net: ethernet: ti: am65-cpsw-qos: fix IET verify retry mechanism + - net: mdio: fix resource leak in mdiobus_register_device() + - wifi: mac80211: skip rate verification for not captured PSDUs + - Bluetooth: hci_event: Fix not handling PA Sync Lost event + - net/mlx5e: Fix missing error assignment in mlx5e_xfrm_add_state() + - net/mlx5e: Fix maxrate wraparound in threshold between units + - net/mlx5e: Fix wraparound in rate limiting for values above 255 Gbps + - net/mlx5e: Fix potentially misleading debug message + - net/mlx5: Fix typo of MLX5_EQ_DOORBEL_OFFSET + - net/mlx5: Store the global doorbell in mlx5_priv + - net/mlx5e: Prepare for using different CQ doorbells + - net_sched: limit try_bulk_dequeue_skb() batches + - wifi: iwlwifi: mvm: fix beacon template/fixed rate + - wifi: iwlwifi: mld: always take beacon ies in link grading + - virtio-net: fix incorrect flags recording in big mode + - hsr: Fix supervision frame sending on HSRv0 + - hsr: Follow standard for HSRv0 supervision frames + - ACPI: CPPC: Detect preferred core availability on online CPUs + - ACPI: CPPC: Check _CPC validity for only the online CPUs + - ACPI: CPPC: Perform fast check switch only for online CPUs + - ACPI: CPPC: Limit perf ctrs in PCC check only to online CPUs + - cpufreq: intel_pstate: Check IDA only before MSR_IA32_PERF_CTL writes + - Bluetooth: L2CAP: export l2cap_chan_hold for modules + - io_uring/rsrc: don't use blk_rq_nr_phys_segments() as number of bvecs + - acpi,srat: Fix incorrect device handle check for Generic Initiator + - regulator: fixed: fix GPIO descriptor leak on register failure + - ASoC: cs4271: Fix regulator leak on probe failure + - ASoC: codecs: va-macro: fix resource leak in probe error path + - drm/vmwgfx: Restore Guest-Backed only cursor plane support + - ASoC: tas2781: fix getting the wrong device number + - pnfs: Fix TLS logic in _nfs4_pnfs_v3_ds_connect() + - pnfs: Fix TLS logic in _nfs4_pnfs_v4_ds_connect() + - pnfs: Set transport security policy to RPC_XPRTSEC_NONE unless using TLS + - simplify nfs_atomic_open_v23() + - NFSv2/v3: Fix error handling in nfs_atomic_open_v23() + - NFS: sysfs: fix leak when nfs_client kobject add fails + - NFSv4: Fix an incorrect parameter when calling nfs4_call_sync() + - drm/amd/amdgpu: Ensure isp_kernel_buffer_alloc() creates a new BO + - acpi/hmat: Fix lockdep warning for hmem_register_resource() + - ASoC: rsnd: fix OF node reference leak in rsnd_ssiu_probe() + - drm/client: fix MODULE_PARM_DESC string for "active" + - irqchip/riscv-intc: Add missing free() callback in riscv_intc_domain_ops + - lib/crypto: arm/curve25519: Disable on CPU_BIG_ENDIAN + - hostfs: Fix only passing host root in boot stage with new mount + - afs: Fix dynamic lookup to fail on cell lookup failure + - mtd: onenand: Pass correct pointer to IRQ handler + - virtio-fs: fix incorrect check for fsvq->kobj + - fs/namespace: correctly handle errors returned by grab_requested_mnt_ns + - perf header: Write bpf_prog (infos|btfs)_cnt to data file + - perf build: Don't fail fast path feature detection when binutils-devel + is not available + - perf lock: Fix segfault due to missing kernel map + - perf test shell lock_contention: Extra debug diagnostics + - perf test: Fix lock contention test + - arm64: dts: rockchip: Set correct pinctrl for I2S1 8ch TX on odroid-m1 + - arm64: dts: rockchip: Fix PCIe power enable pin for BigTreeTech CB2 and + Pi2 + - arm64: dts: rockchip: Make RK3588 GPU OPP table naming less generic + - ARM: dts: imx6ull-engicam-microgea-rmm: fix report-rate-hz value + - ARM: dts: imx51-zii-rdu1: Fix audmux node names + - arm64: dts: imx8-ss-img: Avoid gpio0_mipi_csi GPIOs being deferred + - arm64: dts: imx8mp-kontron: Fix USB OTG role switching + - HID: hid-ntrig: Prevent memory leak in ntrig_report_version() + - ARM: dts: BCM53573: Fix address of Luxul XAP-1440's Ethernet PHY + - arm64: dts: rockchip: Fix USB power enable pin for BTT CB2 and Pi2 + - arm64: dts: rockchip: drop reset from rk3576 i2c9 node + - pwm: adp5585: Correct mismatched pwm chip info + - HID: playstation: Fix memory leak in dualshock4_get_calibration_data() + - HID: uclogic: Fix potential memory leak in error path + - LoongArch: KVM: Restore guest PMU if it is enabled + - LoongArch: KVM: Add delay until timer interrupt injected + - LoongArch: KVM: Fix max supported vCPUs set with EIOINTC + - KVM: arm64: Make all 32bit ID registers fully writable + - KVM: SVM: Mark VMCB_LBR dirty when MSR_IA32_DEBUGCTLMSR is updated + - KVM: nSVM: Always recalculate LBR MSR intercepts in svm_update_lbrv() + - KVM: nSVM: Fix and simplify LBR virtualization handling with nested + - KVM: VMX: Fix check for valid GVA on an EPT violation + - nfsd: add missing FATTR4_WORD2_CLONE_BLKSIZE from supported attributes + - gcov: add support for GCC 15 + - kho: warn and exit when unpreserved page wasn't preserved + - strparser: Fix signed/unsigned mismatch bug + - dma-mapping: benchmark: Restore padding to ensure uABI remained + consistent + - maple_tree: fix tracepoint string pointers + - LoongArch: Consolidate early_ioremap()/ioremap_prot() + - LoongArch: Use correct accessor to read FWPC/MWPC + - LoongArch: Let {pte,pmd}_modify() record the status of _PAGE_DIRTY + - mm/damon/sysfs: change next_update_jiffies to a global variable + - selftests/tracing: Run sample events to clear page cache events + - wifi: mac80211: reject address change while connecting + - mm/huge_memory: preserve PG_has_hwpoisoned if a folio is split to >0 + order + - mm/mm_init: fix hash table order logging in alloc_large_system_hash() + - mm/damon/stat: change last_refresh_jiffies to a global variable + - mm/kmsan: fix kmsan kmalloc hook when no stack depots are allocated yet + - mm/shmem: fix THP allocation and fallback loop + - mm/mremap: honour writable bit in mremap pte batching + - mm/huge_memory: fix folio split check for anon folios in swapcache + - mmc: sdhci-of-dwcmshc: Change DLL_STRBIN_TAPNUM_DEFAULT to 0x4 + - mmc: pxamci: Simplify pxamci_probe() error handling using devm APIs + - mmc: dw_mmc-rockchip: Fix wrong internal phase calculate + - ASoC: sdw_utils: fix device reference leak in is_sdca_endpoint_present() + - crypto: hisilicon/qm - Fix device reference leak in qm_get_qos_value + - smb: client: fix cifs_pick_channel when channel needs reconnect + - spi: Try to get ACPI GPIO IRQ earlier + - x86/microcode/AMD: Add Zen5 model 0x44, stepping 0x1 minrev + - x86/CPU/AMD: Add additional fixed RDSEED microcode revisions + - selftests/user_events: fix type cast for write_index packed member in + perf_test + - gendwarfksyms: Skip files with no exports + - ftrace: Fix BPF fexit with livepatch + - LoongArch: Consolidate max_pfn & max_low_pfn calculation + - LoongArch: Use physical addresses for CSR_MERRENTRY/CSR_TLBRENTRY + - EDAC/altera: Handle OCRAM ECC enable after warm reset + - EDAC/altera: Use INTTEST register for Ethernet and USB SBE injection + - PM: hibernate: Emit an error when image writing fails + - PM: hibernate: Use atomic64_t for compressed_size variable + - btrfs: zoned: fix conventional zone capacity calculation + - btrfs: zoned: fix stripe width calculation + - btrfs: scrub: put bio after errors in scrub_raid56_parity_stripe() + - btrfs: do not update last_log_commit when logging inode due to a new + name + - btrfs: release root after error in data_reloc_print_warning_inode() + - drm/amdkfd: relax checks for over allocation of save area + - drm/amdgpu: disable peer-to-peer access for DCC-enabled GC12 VRAM + surfaces + - drm/i915/psr: fix pipe to vblank conversion + - drm/xe/xe3lpg: Extend Wa_15016589081 for xe3lpg + - drm/xe/xe3: Extend wa_14023061436 + - drm/xe/xe3: Add WA_14024681466 for Xe3_LPG + - pmdomain: imx: Fix reference count leak in imx_gpc_remove + - pmdomain: samsung: plug potential memleak during probe + - pmdomain: samsung: Rework legacy splash-screen handover workaround + - selftests: mptcp: connect: fix fallback note due to OoO + - selftests: mptcp: join: rm: set backup flag + - selftests: mptcp: join: endpoints: longer transfer + - selftests: mptcp: connect: trunc: read all recv data + - selftests: mptcp: join: userspace: longer transfer + - selftests: mptcp: join: properly kill background tasks + - mm/huge_memory: do not change split_huge_page*() target order silently + - mm/memory: do not populate page table entries beyond i_size + - scripts/decode_stacktrace.sh: symbol: avoid trailing whitespaces + - scripts/decode_stacktrace.sh: symbol: preserve alignment + - scripts/decode_stacktrace.sh: fix build ID and PC source parsing + - ASoC: da7213: Convert to DEFINE_RUNTIME_DEV_PM_OPS() + - ASoC: da7213: Use component driver suspend/resume + - KVM: x86: Rename local "ecx" variables to "msr" and "pmc" as appropriate + - KVM: x86: Add support for RDMSR/WRMSRNS w/ immediate on Intel + - KVM: VMX: Inject #UD if guest tries to execute SEAMCALL or TDCALL + - isdn: mISDN: hfcsusb: fix memory leak in hfcsusb_probe() + - net: phy: micrel: Fix lan8814_config_init + - Linux 6.17.9 + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-68204 + - pmdomain: arm: scmi: Fix genpd leak on provider registration failure + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-68203 + - drm/amdgpu: fix lock warning in amdgpu_userq_fence_driver_process + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-40267 + - io_uring/rw: ensure allocated iovec gets cleared for early failure + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-68198 + - crash: fix crashkernel resource shrink + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-68199 + - codetag: debug: handle existing CODETAG_EMPTY in mark_objexts_empty for + slabobj_ext + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-40268 + - cifs: client: fix memory leak in smb3_fs_context_parse_param + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-40269 + - ALSA: usb-audio: Fix potential overflow of PCM transfer buffer + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-68205 + - ALSA: hda/hdmi: Fix breakage at probing nvhdmi-mcp driver + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-40270 + - mm, swap: fix potential UAF issue for VMA readahead + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-40271 + - fs/proc: fix uaf in proc_readdir_de() + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-40272 + - mm/secretmem: fix use-after-free race in fault handler + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-68245 + - net: netpoll: fix incorrect refcount handling causing incorrect cleanup + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-68240 + - nilfs2: avoid having an active sc_timer before freeing sci + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-68241 + - ipv4: route: Prevent rt_bind_exception() from rebinding stale fnhe + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-68211 + - ksm: use range-walk function to jump over holes in + scan_get_next_rmap_item + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-68246 + - ksmbd: close accepted socket when per-IP limit rejects connection + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-40273 + - NFSD: free copynotify stateid in nfs4_free_ol_stateid() + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-40212 + - nfsd: fix refcount leak in nfsd_set_fh_dentry() + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-40274 + - KVM: guest_memfd: Remove bindings on memslot deletion when gmem is dying + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-68202 + - sched_ext: Fix unsafe locking in the scx_dump_state() + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-68239 + - binfmt_misc: restore write access before closing files opened by + open_exec() + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-68247 + - posix-timers: Plug potential memory leak in do_timer_create() + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-68208 + - bpf: account for current allocated stack depth in + widen_imprecise_scalars() + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-68200 + - bpf: Add bpf_prog_run_data_pointers() + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-40275 + - ALSA: usb-audio: Fix NULL pointer dereference in + snd_usb_mixer_controls_badd + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-68242 + - NFS: Fix LTP test failures when timestamps are delegated + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-68243 + - NFS: Check the TLS certificate fields in nfs_match_client() + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-40276 + - drm/panthor: Flush shmem writes before mapping buffers CPU-uncached + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-40277 + - drm/vmwgfx: Validate command header size against SVGA_CMD_MAX_DATASIZE + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-68206 + - netfilter: nft_ct: add seqadj extension for natted connections + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-68209 + - mlx5: Fix default values in create CQ + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-40278 + - net: sched: act_ife: initialize struct tc_ife to fix KMSAN kernel- + infoleak + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-40279 + - net: sched: act_connmark: initialize struct tc_ife to fix kernel leak + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-40280 + - tipc: Fix use-after-free in tipc_mon_reinit_self(). + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-40281 + - sctp: prevent possible shift-out-of-bounds in sctp_transport_update_rto + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-40282 + - Bluetooth: 6lowpan: reset link-local header on ipv6 recv path + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-40283 + - Bluetooth: btusb: reorder cleanup in btusb_disconnect to avoid UAF + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-40284 + - Bluetooth: MGMT: cancel mesh send timer when hdev removed + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-68210 + - erofs: avoid infinite loop due to incomplete zstd-compressed data + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-40285 + - smb/server: fix possible refcount leak in smb2_sess_setup() + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-40286 + - smb/server: fix possible memory leak in smb2_read() + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-40287 + - exfat: fix improper check of dentry.stream.valid_size + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-40288 + - drm/amdgpu: Fix NULL pointer dereference in VRAM logic for APU devices + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-40289 + - drm/amdgpu: hide VRAM sysfs attributes on GPUs without VRAM + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-68201 + - drm/amdgpu: remove two invalid BUG_ON()s + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-68207 + - drm/xe/guc: Synchronize Dead CT worker with unbind + * Questing update: v6.17.8 upstream stable release (LP: #2136850) // + CVE-2025-68244 + - drm/i915: Avoid lock inversion when pinning to GGTT on CHV/BXT+VTD + * Questing update: v6.17.8 upstream stable release (LP: #2136833) + - Revert "Bluetooth: L2CAP: convert timeouts to secs_to_jiffies()" + - sched_ext: Mark scx_bpf_dsq_move_set_[slice|vtime]() with KF_RCU + - net: usb: asix_devices: Check return value of usbnet_get_endpoints + - fbdev: atyfb: Check if pll_ops->init_pll failed + - ACPI: button: Call input_free_device() on failing input device + registration + - ACPI: fan: Use platform device for devres-related actions + - virtio-net: drop the multi-buffer XDP packet in zerocopy + - batman-adv: Release references to inactive interfaces + - Bluetooth: rfcomm: fix modem control handling + - net: phy: dp83867: Disable EEE support as not implemented + - fbdev: pvr2fb: Fix leftover reference to ONCHIP_NR_DMA_CHANNELS + - fbdev: valkyriefb: Fix reference count leak in valkyriefb_init + - mptcp: drop bogus optimization in __mptcp_check_push() + - mptcp: restore window probe + - ASoC: qdsp6: q6asm: do not sleep while atomic + - ASoC: renesas: rz-ssi: Use proper dma_buffer_pos after resume + - s390/pci: Restore IRQ unconditionally for the zPCI device + - x86/build: Disable SSE4a + - wifi: ath10k: Fix memory leak on unsupported WMI command + - wifi: ath11k: Add missing platform IDs for quirk table + - wifi: ath12k: free skb during idr cleanup callback + - wifi: ath11k: avoid bit operation on key flags + - drm/msm/a6xx: Fix GMU firmware parser + - ALSA: usb-audio: fix control pipe direction + - ASoC: cs-amp-lib-test: Fix missing include of kunit/test-bug.h + - wifi: mac80211: reset FILS discovery and unsol probe resp intervals + - wifi: mac80211: fix key tailroom accounting leak + - wifi: nl80211: call kfree without a NULL check + - kunit: test_dev_action: Correctly cast 'priv' pointer to long* + - scsi: ufs: core: Initialize value of an attribute returned by uic cmd + - scsi: core: Fix the unit attention counter implementation + - bpf: Do not audit capability check in do_jit() + - nvmet-auth: update sc_c in host response + - crypto: s390/phmac - Do not modify the req->nbytes value + - ASoC: Intel: avs: Unprepare a stream when XRUN occurs + - ASoC: fsl_sai: fix bit order for DSD format + - ASoC: fsl_micfil: correct the endian format for DSD + - libbpf: Fix powerpc's stack register definition in bpf_tracing.h + - ASoC: mediatek: Fix double pm_runtime_disable in remove functions + - Bluetooth: ISO: Fix BIS connection dst_type handling + - Bluetooth: btmtksdio: Add pmctrl handling for BT closed state during + reset + - Bluetooth: HCI: Fix tracking of advertisement set/instance 0x00 + - Bluetooth: ISO: Fix another instance of dst_type handling + - Bluetooth: btintel_pcie: Fix event packet loss issue + - Bluetooth: hci_conn: Fix connection cleanup with BIG with 2 or more BIS + - Bluetooth: hci_core: Fix tracking of periodic advertisement + - bpf: Conditionally include dynptr copy kfuncs + - drm/msm: Ensure vm is created in VM_BIND ioctl + - ALSA: usb-audio: add mono main switch to Presonus S1824c + - ALSA: usb-audio: don't log messages meant for 1810c when initializing + 1824c + - ACPI: MRRM: Check revision of MRRM table + - drm/etnaviv: fix flush sequence logic + - tools: ynl: fix string attribute length to include null terminator + - net: hns3: return error code when function fails + - sfc: fix potential memory leak in efx_mae_process_mport() + - tools: ynl: avoid print_field when there is no reply + - dpll: spec: add missing module-name and clock-id to pin-get reply + - ASoC: fsl_sai: Fix sync error in consumer mode + - ASoC: soc_sdw_utils: remove cs42l43 component_name + - drm/amd/pm: fix smu table id bound check issue in smu_cmn_update_table() + - drm/amd/pm/powerplay/smumgr: Fix PCIeBootLinkLevel value on Fiji + - drm/amd/pm/powerplay/smumgr: Fix PCIeBootLinkLevel value on Iceland + - drm/amdgpu: fix SPDX headers on amdgpu_cper.c/h + - drm/amdgpu: fix SPDX header on amd_cper.h + - drm/amdgpu: fix SPDX header on irqsrcs_vcn_5_0.h + - ACPI: fan: Use ACPI handle when retrieving _FST + - block: fix op_is_zone_mgmt() to handle REQ_OP_ZONE_RESET_ALL + - block: make REQ_OP_ZONE_OPEN a write operation + - dma-fence: Fix safe access wrapper to call timeline name method + - kbuild: align modinfo section for Secureboot Authenticode EDK2 compat + - regmap: irq: Correct documentation of wake_invert flag + - [Config] Disable ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP for s390x + - s390/mm: Fix memory leak in add_marker() when kvrealloc() fails + - drm/xe: Do not wake device during a GT reset + - drm/sched: avoid killing parent entity on child SIGKILL + - drm/sched: Fix race in drm_sched_entity_select_rq() + - drm/nouveau: Fix race in nouveau_sched_fini() + - drm/i915/dmc: Clear HRR EVT_CTL/HTP to zero on ADL-S + - drm/ast: Clear preserved bits from register output value + - drm/amd: Check that VPE has reached DPM0 in idle handler + - drm/amd/display: Fix incorrect return of vblank enable on unconfigured + crtc + - drm/amd/display: Don't program BLNDGAM_MEM_PWR_FORCE when CM low-power + is disabled on DCN30 + - drm/amd/display: Add HDR workaround for a specific eDP + - mptcp: leverage skb deferral free + - mptcp: fix MSG_PEEK stream corruption + - cpuidle: governors: menu: Rearrange main loop in menu_select() + - cpuidle: governors: menu: Select polling state in some more cases + - PM: hibernate: Combine return paths in power_down() + - PM: sleep: Allow pm_restrict_gfp_mask() stacking + - mfd: kempld: Switch back to earlier ->init() behavior + - soc: aspeed: socinfo: Add AST27xx silicon IDs + - firmware: qcom: scm: preserve assign_mem() error return value + - soc: qcom: smem: Fix endian-unaware access of num_entries + - spi: loopback-test: Don't use %pK through printk + - spi: spi-qpic-snand: handle 'use_ecc' parameter of + qcom_spi_config_cw_read() + - soc: ti: pruss: don't use %pK through printk + - bpf: Don't use %pK through printk + - mmc: sdhci: Disable SD card clock before changing parameters + - pinctrl: single: fix bias pull up/down handling in pin_config_set + - mmc: host: renesas_sdhi: Fix the actual clock + - memstick: Add timeout to prevent indefinite waiting + - cpufreq: ti: Add support for AM62D2 + - bpf: Use tnums for JEQ/JNE is_branch_taken logic + - firmware: ti_sci: Enable abort handling of entry to LPM + - firewire: ohci: move self_id_complete tracepoint after validating + register + - irqchip/sifive-plic: Respect mask state when setting affinity + - irqchip/loongson-eiointc: Route interrupt parsed from bios table + - ACPI: sysfs: Use ACPI_FREE() for freeing an ACPI object + - ACPI: video: force native for Lenovo 82K8 + - libbpf: Fix USDT SIB argument handling causing unrecognized register + error + - selftests/bpf: Fix bpf_prog_detach2 usage in test_lirc_mode2 + - arm64: versal-net: Update rtc calibration value + - Revert "UBUNTU: SAUCE: firmware: qcom: scm: Allow QSEECOM on Dell + Inspiron 7441 / Latitude 7455" + - firmware: qcom: scm: Allow QSEECOM on Dell Inspiron 7441 / Latitude 7455 + - kselftest/arm64: tpidr2: Switch to waitpid() over wait4() + - arc: Fix __fls() const-foldability via __builtin_clzl() + - selftests/bpf: Upon failures, exit with code 1 in test_xsk.sh + - irqchip/gic-v2m: Handle Multiple MSI base IRQ Alignment + - ACPI: PRM: Skip handlers with NULL handler_address or NULL VA + - ACPI: resource: Skip IRQ override on ASUS Vivobook Pro N6506CU + - ACPI: scan: Add Intel CVS ACPI HIDs to acpi_ignore_dep_ids[] + - thermal: gov_step_wise: Allow cooling level to be reduced earlier + - thermal: intel: selftests: workload_hint: Mask unsupported types + - power: supply: qcom_battmgr: add OOI chemistry + - hwmon: (k10temp) Add thermal support for AMD Family 1Ah-based models + - hwmon: (k10temp) Add device ID for Strix Halo + - hwmon: (lenovo-ec-sensors) Update P8 supprt + - hwmon: (sbtsi_temp) AMD CPU extended temperature range support + - pinctrl: renesas: rzg2l: Add suspend/resume support for Schmitt control + registers + - pinctrl: keembay: release allocated memory in detach path + - power: supply: sbs-charger: Support multiple devices + - io_uring/rsrc: respect submitter_task in io_register_clone_buffers() + - hwmon: sy7636a: add alias + - selftests/bpf: Fix incorrect array size calculation + - block: check for valid bio while splitting + - irqchip/loongson-pch-lpc: Use legacy domain for PCH-LPC IRQ controller + - cpufreq: ondemand: Update the efficient idle check for Intel extended + Families + - arm64: zynqmp: Disable coresight by default + - arm64: zynqmp: Revert usb node drive strength and slew rate for zcu106 + - soc/tegra: fuse: Add Tegra114 nvmem cells and fuse lookups + - ARM: tegra: p880: set correct touchscreen clipping + - ARM: tegra: transformer-20: add missing magnetometer interrupt + - ARM: tegra: transformer-20: fix audio-codec interrupt + - firmware: qcom: tzmem: disable sc7180 platform + - soc: ti: k3-socinfo: Add information for AM62L SR1.1 + - mmc: sdhci-msm: Enable tuning for SDR50 mode for SD card + - pwm: pca9685: Use bulk write to atomicially update registers + - ACPICA: dispatcher: Use acpi_ds_clear_operands() in + acpi_ds_call_control_method() + - tee: allow a driver to allocate a tee_device without a pool + - kunit: Enable PCI on UML without triggering WARN() + - selftests/bpf: Fix arena_spin_lock selftest failure + - bpf: Do not limit bpf_cgroup_from_id to current's namespace + - i3c: mipi-i3c-hci-pci: Add support for Intel Wildcat Lake-U I3C + - rust: kunit: allow `cfg` on `test`s + - video: backlight: lp855x_bl: Set correct EPROM start for LP8556 + - i3c: dw: Add shutdown support to dw_i3c_master driver + - io_uring/zcrx: check all niovs filled with dma addresses + - tools/cpupower: fix error return value in cpupower_write_sysfs() + - io_uring/zcrx: account niov arrays to cgroup + - pmdomain: apple: Add "apple,t8103-pmgr-pwrstate" + - power: supply: qcom_battmgr: handle charging state change notifications + - bpftool: Fix -Wuninitialized-const-pointer warnings with clang >= 21 + - cpuidle: Fail cpuidle device registration if there is one already + - selftests/bpf: Fix selftest verifier_arena_large failure + - selftests: ublk: fix behavior when fio is not installed + - spi: rpc-if: Add resume support for RZ/G3E + - ACPI: SPCR: Support Precise Baud Rate field + - clocksource/drivers/vf-pit: Replace raw_readl/writel to readl/writel + - clocksource/drivers/timer-rtl-otto: Work around dying timers + - clocksource/drivers/timer-rtl-otto: Do not interfere with interrupts + - riscv: bpf: Fix uninitialized symbol 'retval_off' + - bpf: Clear pfmemalloc flag when freeing all fragments + - selftests: drv-net: Pull data before parsing headers + - nvme: Use non zero KATO for persistent discovery connections + - uprobe: Do not emulate/sstep original instruction when ip is changed + - hwmon: (asus-ec-sensors) increase timeout for locking ACPI mutex + - hwmon: (dell-smm) Remove Dell Precision 490 custom config data + - hwmon: (dell-smm) Add support for Dell OptiPlex 7040 + - tools/cpupower: Fix incorrect size in cpuidle_state_disable() + - selftests/bpf: Fix flaky bpf_cookie selftest + - tools/power turbostat: Fix incorrect sorting of PMT telemetry + - tools/power x86_energy_perf_policy: Fix incorrect fopen mode usage + - tools/power x86_energy_perf_policy: Enhance HWP enable + - tools/power x86_energy_perf_policy: Prefer driver HWP limits + - mfd: simple-mfd-i2c: Add compatible strings for Layerscape QIXIS FPGA + - mfd: stmpe: Remove IRQ domain upon removal + - mfd: stmpe-i2c: Add missing MODULE_LICENSE + - mfd: qnap-mcu: Handle errors returned from qnap_mcu_write + - mfd: qnap-mcu: Include linux/types.h in qnap-mcu.h shared header + - mfd: madera: Work around false-positive -Wininitialized warning + - mfd: da9063: Split chip variant reading in two bus transactions + - mfd: macsmc: Add "apple,t8103-smc" compatible + - mfd: core: Increment of_node's refcount before linking it to the + platform device + - mfd: cs42l43: Move IRQ enable/disable to encompass force suspend + - mfd: intel-lpss: Add Intel Wildcat Lake LPSS PCI IDs + - drm/xe/ptl: Apply Wa_16026007364 + - drm/xe/configfs: Enforce canonical device names + - drm/amd/display: Update tiled to tiled copy command + - drm/amd/display: fix condition for setting timing_adjust_pending + - drm/amd/display: ensure committing streams is seamless + - drm/amdgpu: add range check for RAS bad page address + - drm/amdgpu: Check vcn sram load return value + - drm/amd/display: Remove check DPIA HPD status for BW Allocation + - drm/amd/display: Increase AUX Intra-Hop Done Max Wait Duration + - drm/amd/display: Fix dmub_cmd header alignment + - drm/xe/guc: Add more GuC load error status codes + - drm/xe/pf: Don't resume device from restart worker + - drm/amdgpu: Fix build error when CONFIG_SUSPEND is disabled + - drm/amdgpu: Update IPID value for bad page threshold CPER + - drm/amdgpu: Avoid rma causes GPU duplicate reset + - drm/amdgpu: Effective health check before reset + - drm/amd/amdgpu: Release xcp drm memory after unplug + - drm/amdgpu: Fix vcn v5.0.1 poison irq call trace + - drm/xe: Extend wa_13012615864 to additional Xe2 and Xe3 platforms + - drm/amdgpu: Skip poison aca bank from UE channel + - drm/amd/display: add more cyan skillfish devices + - drm/amdgpu: Initialize jpeg v5_0_1 ras function + - drm/amdgpu: skip mgpu fan boost for multi-vf + - drm/amd/display: fix dmub access race condition + - drm/amd/display: update dpp/disp clock from smu clock table + - drm/amd/pm: Use cached metrics data on aldebaran + - drm/amd/pm: Use cached metrics data on arcturus + - accel/amdxdna: Unify pm and rpm suspend and resume callbacks + - drm/amdgpu/jpeg: Hold pg_lock before jpeg poweroff + - drm/xe/pf: Program LMTT directory pointer on all GTs within a tile + - drm/nouveau: replace snprintf() with scnprintf() in nvkm_snprintbf() + - ASoC: tas2781: Add keyword "init" in profile section + - ASoC: mediatek: Use SND_JACK_AVOUT for HDMI/DP jacks + - drm/amd/display: Reset apply_eamless_boot_optimization when dpms_off + - drm/amdgpu: add to custom amdgpu_drm_release drm_dev_enter/exit + - drm/amd/display: Wait until OTG enable state is cleared + - drm/xe: rework PDE PAT index selection + - docs: kernel-doc: avoid script crash on ancient Python + - drm/sharp-memory: Do not access GEM-DMA vaddr directly + - PCI: Disable MSI on RDC PCI to PCIe bridges + - drm/nouveau: always set RMDevidCheckIgnore for GSP-RM + - drm/panel-edp: Add SHP LQ134Z1 panel for Dell XPS 9345 + - selftests/net: Replace non-standard __WORDSIZE with sizeof(long) * 8 + - selftests/net: Ensure assert() triggers in psock_tpacket.c + - wifi: rtw89: print just once for unknown C2H events + - wifi: rtw88: sdio: use indirect IO for device registers before power-on + - wifi: rtw89: add dummy C2H handlers for BCN resend and update done + - drm/amdkfd: return -ENOTTY for unsupported IOCTLs + - selftests: drv-net: devmem: add / correct the IPv6 support + - selftests: drv-net: devmem: flip the direction of Tx tests + - media: pci: ivtv: Don't create fake v4l2_fh + - media: amphion: Delete v4l2_fh synchronously in .release() + - drm/tidss: Use the crtc_* timings when programming the HW + - drm/bridge: cdns-dsi: Fix REG_WAKEUP_TIME value + - drm/bridge: cdns-dsi: Don't fail on MIPI_DSI_MODE_VIDEO_BURST + - drm/tidss: Set crtc modesetting parameters with adjusted mode + - drm/tidss: Remove early fb + - RDMA/mana_ib: Drain send wrs of GSI QP + - media: i2c: Kconfig: Ensure a dependency on HAVE_CLK for + VIDEO_CAMERA_SENSOR + - PCI/ERR: Update device error_state already after reset + - x86/vsyscall: Do not require X86_PF_INSTR to emulate vsyscall + - net: stmmac: Check stmmac_hw_setup() in stmmac_resume() + - ice: Don't use %pK through printk or tracepoints + - thunderbolt: Use is_pciehp instead of is_hotplug_bridge + - ASoC: es8323: enable DAPM power widgets for playback DAC and output + - powerpc/eeh: Use result of error_detected() in uevent + - s390/pci: Use pci_uevent_ers() in PCI recovery + - bridge: Redirect to backup port when port is administratively down + - selftests: drv-net: wait for carrier + - net: phy: mscc: report and configure in-band auto-negotiation for + SGMII/QSGMII + - scsi: ufs: host: mediatek: Fix auto-hibern8 timer configuration + - scsi: ufs: host: mediatek: Fix PWM mode switch issue + - scsi: ufs: host: mediatek: Assign power mode userdata before FASTAUTO + mode change + - scsi: ufs: host: mediatek: Change reset sequence for improved stability + - scsi: ufs: host: mediatek: Fix invalid access in vccqx handling + - gpu: nova-core: register: allow fields named `offset` + - drm/panthor: Serialize GPU cache flush operations + - HID: pidff: Use direction fix only for conditional effects + - HID: pidff: PERMISSIVE_CONTROL quirk autodetection + - drm/bridge: display-connector: don't set OP_DETECT for DisplayPorts + - drm/amdkfd: Handle lack of READ permissions in SVM mapping + - drm/amdgpu: refactor bad_page_work for corner case handling + - hwrng: timeriomem - Use us_to_ktime() where appropriate + - iio: adc: spear_adc: mask SPEAR_ADC_STATUS channel and avg sample before + setting register + - iio: adc: imx93_adc: load calibrated values even calibration failed + - usb: gadget: f_ncm: Fix MAC assignment NCM ethernet + - ASoC: es8323: remove DAC enablement write from es8323_probe + - ASoC: es8323: add proper left/right mixer controls via DAPM + - ASoC: codecs: wsa883x: Handle shared reset GPIO for WSA883x speakers + - drm/xe: Make page size consistent in loop + - wifi: rtw89: wow: remove notify during WoWLAN net-detect + - wifi: rtw89: fix BSSID comparison for non-transmitted BSSID + - wifi: rtw89: 8851b: rfk: update IQK TIA setting + - dm error: mark as DM_TARGET_PASSES_INTEGRITY + - char: misc: Make misc_register() reentry for miscdevice who wants + dynamic minor + - char: misc: Does not request module for miscdevice with dynamic minor + - net: When removing nexthops, don't call synchronize_net if it is not + necessary + - net: Call trace_sock_exceed_buf_limit() for memcg failure with + SK_MEM_RECV. + - dmaengine: idxd: Add a new IAA device ID for Wildcat Lake family + platforms + - PCI/P2PDMA: Fix incorrect pointer usage in devm_kfree() call + - bnxt_en: Add Hyper-V VF ID + - tty: serial: Modify the use of dev_err_probe() + - ALSA: usb-audio: Add validation of UAC2/UAC3 effect units + - Octeontx2-af: Broadcast XON on all channels + - idpf: do not linearize big TSO packets + - drm/xe/pcode: Initialize data0 for pcode read routine + - drm/panel: ilitek-ili9881c: turn off power-supply when init fails + - drm/panel: ilitek-ili9881c: move display_on/_off dcs calls to + (un-)prepare + - rds: Fix endianness annotation for RDS_MPATH_HASH + - net: wangxun: limit tx_max_coalesced_frames_irq + - iio: imu: bmi270: Match PNP ID found on newer GPD firmware + - media: ipu6: isys: Set embedded data type correctly for metadata formats + - rpmsg: char: Export alias for RPMSG ID rpmsg-raw from table + - net: ipv4: allow directed broadcast routes to use dst hint + - scsi: mpi3mr: Fix device loss during enclosure reboot due to zero link + speed + - wifi: rtw89: coex: Limit Wi-Fi scan slot cost to avoid A2DP glitch + - scsi: mpi3mr: Fix I/O failures during controller reset + - scsi: mpi3mr: Fix controller init failure on fault during queue creation + - scsi: pm80xx: Fix race condition caused by static variables + - extcon: adc-jack: Fix wakeup source leaks on device unbind + - extcon: fsa9480: Fix wakeup source leaks on device unbind + - extcon: axp288: Fix wakeup source leaks on device unbind + - drm/xe: Set GT as wedged before sending wedged uevent + - remoteproc: wkup_m3: Use devm_pm_runtime_enable() helper + - drm/xe/wcl: Extend L3bank mask workaround + - net: phy: fixed_phy: let fixed_phy_unregister free the phy_device + - selftests: drv-net: hds: restore hds settings + - fuse: zero initialize inode private data + - virtio_fs: fix the hash table using in virtio_fs_enqueue_req() + - selftests: pci_endpoint: Skip IRQ test if IRQ is out of range. + - drm/xe: Ensure GT is in C0 during resumes + - misc: pci_endpoint_test: Skip IRQ tests if irq is out of range + - drm/amdgpu: Correct the loss of aca bank reg info + - drm/amdgpu: Correct the counts of nr_banks and nr_errors + - drm/amdkfd: fix vram allocation failure for a special case + - drm/amd/display: Support HW cursor 180 rot for any number of pipe splits + - drm/amdkfd: Tie UNMAP_LATENCY to queue_preemption + - drm/amd/display: wait for otg update pending latch before clock + optimization + - drm/amd/display: Consider sink max slice width limitation for dsc + - drm/amdgpu/vpe: cancel delayed work in hw_fini + - drm/xe: Cancel pending TLB inval workers on teardown + - net: Prevent RPS table overwrite of active flows + - eth: fbnic: Reset hw stats upon PCI error + - wifi: iwlwifi: mld: trigger mlo scan only when not in EMLSR + - platform/x86/intel-uncore-freq: Fix warning in partitioned system + - drm/msm/dpu: Filter modes based on adjusted mode clock + - drm/msm: Use of_reserved_mem_region_to_resource() for "memory-region" + - selftests: drv-net: rss_ctx: fix the queue count check + - media: fix uninitialized symbol warnings + - media: pci: mgb4: Fix timings comparison in VIDIOC_S_DV_TIMINGS + - ASoC: SOF: ipc4-pcm: Add fixup for channels + - drm/amdgpu: Notify pmfw bad page threshold exceeded + - drm/amd/display: Increase minimum clock for TMDS 420 with pipe splitting + - drm/amdgpu: Avoid jpeg v5.0.1 poison irq call trace on sriov guest + - drm/amd/display: incorrect conditions for failing dto calculations + - drm/amdgpu: Avoid vcn v5.0.1 poison irq call trace on sriov guest + - drm/amdgpu: Respect max pixel clock for HDMI and DVI-D (v2) + - mips: lantiq: danube: add missing properties to cpu node + - mips: lantiq: danube: add model to EASY50712 dts + - mips: lantiq: danube: add missing device_type in pci node + - mips: lantiq: xway: sysctrl: rename stp clock + - mips: lantiq: danube: rename stp node on EASY50712 reference board + - inet_diag: annotate data-races in inet_diag_bc_sk() + - microchip: lan865x: add ndo_eth_ioctl handler to enable PHY ioctl + support + - crypto: qat - use kcalloc() in qat_uclo_map_objs_from_mof() + - scsi: pm8001: Use int instead of u32 to store error codes + - iio: adc: ad7124: do not require mclk + - scsi: ufs: exynos: fsd: Gate ref_clk and put UFS device in reset on + suspend + - media: imx-mipi-csis: Only set clock rate when specified in DT + - wifi: iwlwifi: pcie: remember when interrupts are disabled + - drm/st7571-i2c: add support for inverted pixel format + - ptp: Limit time setting of PTP clocks + - dmaengine: sh: setup_xref error handling + - dmaengine: mv_xor: match alloc_wc and free_wc + - dmaengine: dw-edma: Set status for callback_result + - netfilter: nf_tables: all transaction allocations can now sleep + - drm/msm/dsi/phy: Toggle back buffer resync after preparing PLL + - drm/msm/dsi/phy_7nm: Fix missing initial VCO rate + - drm/amdgpu: Allow kfd CRIU with no buffer objects + - drm/xe/guc: Increase GuC crash dump buffer size + - drm/amd/pm: Increase SMC timeout on SI and warn (v3) + - move_mount(2): take sanity checks in 'beneath' case into do_lock_mount() + - selftests: drv-net: rss_ctx: make the test pass with few queues + - ipv6: Add sanity checks on ipv6_devconf.rpl_seg_enabled + - drm/xe: Extend Wa_22021007897 to Xe3 platforms + - wifi: mac80211: count reg connection element in the size + - drm/panthor: check bo offset alignment in vm bind + - drm: panel-backlight-quirks: Make EDID match optional + - ixgbe: reduce number of reads when getting OROM data + - netlink: specs: fou: change local-v6/peer-v6 check + - net: nfc: nci: Increase NCI_DATA_TIMEOUT to 3000 ms + - media: adv7180: Add missing lock in suspend callback + - media: adv7180: Do not write format to device in set_fmt + - media: adv7180: Only validate format in querystd + - media: verisilicon: Explicitly disable selection api ioctls for decoders + - wifi: mac80211: Fix 6 GHz Band capabilities element advertisement in + lower bands + - platform/x86: think-lmi: Add extra TC BIOS error messages + - platform/x86/intel-uncore-freq: Present unique domain ID per package + - ALSA: usb-audio: apply quirk for MOONDROP Quark2 + - PCI: imx6: Enable the Vaux supply if available + - drm/xe/guc: Set upper limit of H2G retries over CTB + - net: call cond_resched() less often in __release_sock() + - smsc911x: add second read of EEPROM mac when possible corruption seen + - drm/xe: improve dma-resv handling for backup object + - iommu/amd: Add support to remap/unmap IOMMU buffers for kdump + - iommu/amd: Skip enabling command/event buffers for kdump + - iommu/amd: Reuse device table for kdump + - crypto: ccp: Skip SEV and SNP INIT for kdump boot + - iommu/apple-dart: Clear stream error indicator bits for T8110 DARTs + - bus: mhi: host: pci_generic: Add support for all Foxconn T99W696 SKU + variants + - drm/amdgpu: Correct info field of bad page threshold exceed CPER + - drm/amd: add more cyan skillfish PCI ids + - drm/amdgpu: don't enable SMU on cyan skillfish + - drm/amdgpu: add support for cyan skillfish gpu_info + - drm/amd/display: Fix pbn_div Calculation Error + - drm/amd/display: dont wait for pipe update during medupdate/highirq + - drm/amd/pm: refine amdgpu pm sysfs node error code + - drm/amd/display: Indicate when custom brightness curves are in use + - selftests: ncdevmem: don't retry EFAULT + - net: dsa: felix: support phy-mode = "10g-qxgmii" + - usb: gadget: f_hid: Fix zero length packet transfer + - serial: qcom-geni: Add DFS clock mode support to GENI UART driver + - serdev: Drop dev_pm_domain_detach() call + - tty/vt: Add missing return value for VT_RESIZE in vt_ioctl() + - eeprom: at25: support Cypress FRAMs without device ID + - drm/msm/adreno: Add speedbins for A663 GPU + - drm/msm: Fix 32b size truncation + - dt-bindings: display/msm/gmu: Update Adreno 623 bindings + - drm/msm: make sure to not queue up recovery more than once + - char: Use list_del_init() in misc_deregister() to reinitialize list + pointer + - drm/msm/adreno: Add speedbin data for A623 GPU + - drm/msm/adreno: Add fenced regwrite support + - drm/msm/a6xx: Switch to GMU AO counter + - idpf: link NAPIs to queues + - selftests: net: make the dump test less sensitive to mem accounting + - PCI: endpoint: pci-epf-test: Limit PCIe BAR size for fixed BARs + - wifi: rtw89: Add USB ID 2001:332a for D-Link AX9U rev. A1 + - wifi: rtw89: Add USB ID 2001:3327 for D-Link AX18U rev. A1 + - wifi: iwlwifi: fw: Add ASUS to PPAG and TAS list + - drm/xe/i2c: Enable bus mastering + - media: ov08x40: Fix the horizontal flip control + - media: i2c: og01a1b: Specify monochrome media bus format instead of + Bayer + - media: qcom: camss: csiphy-3ph: Add CSIPHY 2ph DPHY v2.0.1 init sequence + - drm/bridge: write full Audio InfoFrame + - drm/xe/guc: Always add CT disable action during second init step + - f2fs: fix wrong layout information on 16KB page + - selftests: mptcp: join: allow more time to send ADD_ADDR + - scsi: ufs: host: mediatek: Enhance recovery on resume failure + - scsi: ufs: ufs-qcom: Align programming sequence of Shared ICE for UFS + controller v5 + - scsi: ufs: host: mediatek: Fix unbalanced IRQ enable issue + - scsi: ufs: host: mediatek: Enhance recovery on hibernation exit failure + - net: phy: marvell: Fix 88e1510 downshift counter errata + - scsi: ufs: host: mediatek: Correct system PM flow + - scsi: ufs: host: mediatek: Disable auto-hibern8 during power mode + changes + - scsi: ufs: host: mediatek: Fix adapt issue after PA_Init + - wifi: cfg80211: update the time stamps in hidden ssid + - wifi: mac80211: Fix HE capabilities element check + - fbcon: Use screen info to find primary device + - phy: cadence: cdns-dphy: Enable lower resolutions in dphy + - Fix access to video_is_primary_device() when compiled without + CONFIG_VIDEO + - phy: renesas: r8a779f0-ether-serdes: add new step added to latest + datasheet + - phy: rockchip: phy-rockchip-inno-csidphy: allow writes to grf register 0 + - drm/msm/registers: Generate _HI/LO builders for reg64 + - net: sh_eth: Disable WoL if system can not suspend + - selftests: net: replace sleeps in fcnal-test with waits + - media: redrat3: use int type to store negative error codes + - platform/x86/amd/pmf: Fix the custom bios input handling mechanism + - selftests: traceroute: Use require_command() + - selftests: traceroute: Return correct value on failure + - openrisc: Add R_OR1K_32_PCREL relocation type module support + - netfilter: nf_reject: don't reply to icmp error messages + - x86/kvm: Prefer native qspinlock for dedicated vCPUs irrespective of + PV_UNHALT + - x86/virt/tdx: Use precalculated TDVPR page physical address + - selftests: Disable dad for ipv6 in fcnal-test.sh + - eth: 8139too: Make 8139TOO_PIO depend on !NO_IOPORT_MAP + - [Config] No longer enable `CONFIG_8139TOO_PIO` for armhf + - selftests: Replace sleep with slowwait + - net: devmem: expose tcp_recvmsg_locked errors + - selftests: net: lib.sh: Don't defer failed commands + - HID: asus: add Z13 folio to generic group for multitouch to work + - watchdog: s3c2410_wdt: Fix max_timeout being calculated larger + - crypto: sun8i-ce - remove channel timeout field + - PCI: dwc: Verify the single eDMA IRQ in dw_pcie_edma_irq_verify() + - crypto: ccp - Fix incorrect payload size calculation in + psp_poulate_hsti() + - crypto: caam - double the entropy delay interval for retry + - can: rcar_canfd: Update bit rate constants for RZ/G3E and R-Car Gen4 + - net: mana: Reduce waiting time if HWC not responding + - ionic: use int type for err in ionic_get_module_eeprom_by_page + - net/cls_cgroup: Fix task_get_classid() during qdisc run + - wifi: mt76: mt7921: Add 160MHz beamformee capability for mt7922 device + - wifi: mt76: mt7925: add pci restore for hibernate + - wifi: mt76: mt7996: Fix mt7996_reverse_frag0_hdr_trans for MLO + - wifi: mt76: mt7996: Set def_wcid pointer in mt7996_mac_sta_init_link() + - wifi: mt76: mt7996: Temporarily disable EPCS + - wifi: mt76: mt7996: support writing MAC TXD for AddBA Request + - wifi: mt76: mt76_eeprom_override to int + - ALSA: serial-generic: remove shared static buffer + - wifi: mt76: mt7996: fix memory leak on mt7996_mcu_sta_key_tlv error + - wifi: mt76: mt7996: disable promiscuous mode by default + - wifi: mt76: use altx queue for offchannel tx on connac+ + - wifi: mt76: improve phy reset on hw restart + - drm/amdgpu: Use memdup_array_user in amdgpu_cs_wait_fences_ioctl + - drm/amdgpu: Release hive reference properly + - drm/amd/display: Fix DMCUB loading sequence for DCN3.2 + - drm/amd/display: Set up pixel encoding for YCBCR422 + - drm/amd/display: fix dml ms order of operations + - drm/amd/display: Don't use non-registered VUPDATE on DCE 6 + - drm/amd/display: Keep PLL0 running on DCE 6.0 and 6.4 + - drm/amd/display: Fix DVI-D/HDMI adapters + - drm/amd/display: Disable VRR on DCE 6 + - drm/amd/display/dml2: Guard dml21_map_dc_state_into_dml_display_cfg with + DC_FP_START + - net: phy: clear EEE runtime state in PHY_HALTED/PHY_ERROR + - ethernet: Extend device_get_mac_address() to use NVMEM + - scsi: ufs: ufs-qcom: Disable lane clocks during phy hibern8 + - HID: i2c-hid: Resolve touchpad issues on Dell systems during S4 + - hinic3: Queue pair endianness improvements + - hinic3: Fix missing napi->dev in netif_queue_set_napi + - tools: ynl-gen: validate nested arrays + - drm/xe/guc: Return an error code if the GuC load fails + - drm/amdgpu: reject gang submissions under SRIOV + - selftests/Makefile: include $(INSTALL_DEP_TARGETS) in clean target to + clean net/lib dependency + - scsi: ufs: core: Disable timestamp functionality if not supported + - scsi: lpfc: Clean up allocated queues when queue setup mbox commands + fail + - scsi: lpfc: Decrement ndlp kref after FDISC retries exhausted + - scsi: lpfc: Check return status of lpfc_reset_flush_io_context during + TGT_RESET + - scsi: lpfc: Remove ndlp kref decrement clause for F_Port_Ctrl in + lpfc_cleanup + - scsi: lpfc: Define size of debugfs entry for xri rebalancing + - scsi: lpfc: Ensure PLOGI_ACC is sent prior to PRLI in Point to Point + topology + - allow finish_no_open(file, ERR_PTR(-E...)) + - usb: mon: Increase BUFF_MAX to 64 MiB to support multi-MB URBs + - usb: xhci: plat: Facilitate using autosuspend for xhci plat devices + - wifi: rtw89: disable RTW89_PHYSTS_IE09_FTR_0 for ppdu status + - wifi: rtw89: obtain RX path from ppdu status IE00 + - wifi: rtw89: renew a completion for each H2C command waiting C2H event + - usb: xhci-pci: add support for hosts with zero USB3 ports + - ipv6: np->rxpmtu race annotation + - RDMA/irdma: Update Kconfig + - IB/ipoib: Ignore L3 master device + - bnxt_en: Add fw log trace support for 5731X/5741X chips + - mei: make a local copy of client uuid in connect + - ASoC: qcom: sc8280xp: explicitly set S16LE format in + sc8280xp_be_hw_params_fixup() + - net: phy: clear link parameters on admin link down + - net: ethernet: microchip: sparx5: make it selectable for ARCH_LAN969X + - bus: mhi: core: Improve mhi_sync_power_up handling for SYS_ERR state + - iommu/vt-d: Replace snprintf with scnprintf in dmar_latency_snapshot() + - wifi: ath10k: Fix connection after GTK rekeying + - iommu/vt-d: Remove LPIG from page group response descriptor + - wifi: mac80211: Get the correct interface for non-netdev skb status + - wifi: mac80211: Track NAN interface start/stop + - net: intel: fm10k: Fix parameter idx set but not used + - sparc/module: Add R_SPARC_UA64 relocation handling + - sparc64: fix prototypes of reads[bwl]() + - vfio: return -ENOTTY for unsupported device feature + - ptp_ocp: make ptp_ocp driver compatible with PTP_EXTTS_REQUEST2 + - crypto: hisilicon/qm - invalidate queues in use + - crypto: hisilicon/qm - clear all VF configurations in the hardware + - ASoC: ops: improve snd_soc_get_volsw + - PCI/PM: Skip resuming to D0 if device is disconnected + - selftests: forwarding: Reorder (ar)ping arguments to obey POSIX getopt + - remoteproc: qcom: q6v5: Avoid handling handover twice + - wifi: ath12k: Increase DP_REO_CMD_RING_SIZE to 256 + - net: dsa: microchip: Set SPI as bus interface during reset for KSZ8463 + - bng_en: make bnge_alloc_ring() self-unwind on failure + - ALSA: usb-audio: don't apply interface quirk to Presonus S1824c + - tcp: Update bind bucket state on port release + - ovl: make sure that ovl_create_real() returns a hashed dentry + - drm/amd/display: Add missing post flip calls + - drm/amd/display: Add AVI infoframe copy in copy_stream_update_to_stream + - drm/amd/display: Add fast sync field in ultra sleep more for DMUB + - drm/amd/display: Init dispclk from bootup clock for DCN314 + - drm/amd/display: Fix for test crash due to power gating + - drm/amd/display: change dc stream color settings only in atomic commit + - NFSv4: handle ERR_GRACE on delegation recalls + - NFSv4.1: fix mount hang after CREATE_SESSION failure + - net: bridge: Install FDB for bridge MAC on VLAN 0 + - net: phy: dp83640: improve phydev and driver removal handling + - scsi: ufs: core: Change MCQ interrupt enable flow + - scsi: libfc: Fix potential buffer overflow in fc_ct_ms_fill() + - accel/habanalabs/gaudi2: fix BMON disable configuration + - scsi: mpt3sas: Add support for 22.5 Gbps SAS link rate + - accel/habanalabs: return ENOMEM if less than requested pages were pinned + - accel/habanalabs/gaudi2: read preboot status after recovering from dirty + state + - ASoC: renesas: msiof: add .symmetric_xxx on snd_soc_dai_driver + - ASoC: renesas: msiof: use reset controller + - ASoC: renesas: msiof: tidyup DMAC stop timing + - ASoC: renesas: msiof: set SIFCTR register + - ext4: increase IO priority of fastcommit + - drm/amdgpu: Add fallback to pipe reset if KCQ ring reset fails + - drm/amdgpu: Fix fence signaling race condition in userqueue + - ASoC: stm32: sai: manage context in set_sysclk callback + - ASoC: tlv320aic3x: Fix class-D initialization for tlv320aic3007 + - ACPI: scan: Update honor list for RPMI System MSI + - platform/x86: x86-android-tablets: Stop using EPROBE_DEFER + - vfio/pci: Fix INTx handling on legacy non-PCI 2.3 devices + - vfio/nvgrace-gpu: Add GB300 SKU to the devid table + - selftest: net: Fix error message if empty variable + - net/mlx5e: Don't query FEC statistics when FEC is disabled + - Bluetooth: btintel: Add support for BlazarIW core + - net: macb: avoid dealing with endianness in macb_set_hwaddr() + - Bluetooth: btusb: Add new VID/PID 13d3/3627 for MT7925 + - Bluetooth: btintel_pcie: Define hdev->wakeup() callback + - Bluetooth: ISO: Don't initiate CIS connections if there are no buffers + - Bluetooth: btusb: Check for unexpected bytes when defragmenting HCI + frames + - Bluetooth: ISO: Use sk_sndtimeo as conn_timeout + - Bluetooth: btusb: Add new VID/PID 13d3/3633 for MT7922 + - net: stmmac: est: Drop frames causing HLBS error + - exfat: limit log print for IO error + - 6pack: drop redundant locking and refcounting + - page_pool: Clamp pool size to max 16K pages + - net/mlx5e: Prevent entering switchdev mode with inconsistent netns + - ksmbd: use sock_create_kern interface to create kernel socket + - smb: client: update cfid->last_access_time in + open_cached_dir_by_dentry() + - smb: client: transport: avoid reconnects triggered by pending task work + - usb: xhci-pci: Fix USB2-only root hub registration + - drm/amd/display: Add fallback path for YCBCR422 + - ACPICA: Update dsmethod.c to get rid of unused variable warning + - RDMA/bnxt_re: Fix a potential memory leak in destroy_gsi_sqp + - RDMA/irdma: Fix SD index calculation + - RDMA/irdma: Remove unused struct irdma_cq fields + - RDMA/irdma: Set irdma_cq cq_num field during CQ create + - RDMA/uverbs: Fix umem release in UVERBS_METHOD_CQ_CREATE + - RDMA/hns: Fix recv CQ and QP cache affinity + - RDMA/hns: Fix the modification of max_send_sge + - RDMA/hns: Fix wrong WQE data when QP wraps around + - btrfs: mark dirty extent range for out of bound prealloc extents + - clk: qcom: gcc-ipq6018: rework nss_port5 clock to multiple conf + - clk: renesas: rzv2h: Re-assert reset on deassert timeout + - clk: samsung: exynos990: Add missing USB clock registers to HSI0 + - fs/hpfs: Fix error code for new_inode() failure in + mkdir/create/mknod/symlink + - clocksource: hyper-v: Skip unnecessary checks for the root partition + - hyperv: Add missing field to hv_output_map_device_interrupt + - um: Fix help message for ssl-non-raw + - clk: sunxi-ng: sun6i-rtc: Add A523 specifics + - rtc: pcf2127: clear minute/second interrupt + - ARM: at91: pm: save and restore ACR during PLL disable/enable + - clk: at91: add ACR in all PLL settings + - clk: at91: sam9x7: Add peripheral clock id for pmecc + - clk: at91: clk-master: Add check for divide by 3 + - clk: at91: clk-sam9x60-pll: force write to PLL_UPDT register + - clk: ti: am33xx: keep WKUP_DEBUGSS_CLKCTRL enabled + - clk: scmi: Add duty cycle ops only when duty cycle is supported + - clk: clocking-wizard: Fix output clock register offset for Versal + platforms + - NTB: epf: Allow arbitrary BAR mapping + - 9p: fix /sys/fs/9p/caches overwriting itself + - cpufreq: tegra186: Initialize all cores to max frequencies + - 9p: sysfs_init: don't hardcode error to ENOMEM + - scsi: ufs: core: Include UTP error in INT_FATAL_ERRORS + - fbdev: core: Fix ubsan warning in pixel_to_pat + - ACPI: property: Return present device nodes only on fwnode interface + - LoongArch: Handle new atomic instructions for probes + - tools bitmap: Add missing asm-generic/bitsperlong.h include + - tools: lib: thermal: don't preserve owner in install + - tools: lib: thermal: use pkg-config to locate libnl3 + - ALSA: hda/realtek: Add quirk for ASUS ROG Zephyrus Duo + - rtc: zynqmp: Restore alarm functionality after kexec transition + - rtc: pcf2127: fix watchdog interrupt mask on pcf2131 + - net: wwan: t7xx: add support for HP DRMR-H01 + - kbuild: uapi: Strip comments before size type check + - ASoC: meson: aiu-encoder-i2s: fix bit clock polarity + - ASoC: rt722: add settings for rt722VB + - drm/amdgpu: Report individual reset error + - ceph: add checking of wait_for_completion_killable() return value + - ceph: fix potential race condition in ceph_ioctl_lazyio() + - ceph: refactor wake_up_bit() pattern of calling + - x86: uaccess: don't use runtime-const rewriting in modules + - rust: condvar: fix broken intra-doc link + - rust: devres: fix private intra-doc link + - rust: kbuild: workaround `rustdoc` doctests modifier bug + - rust: kbuild: treat `build_error` and `rustdoc` as kernel objects + - media: uvcvideo: Use heuristic to find stream entity + - Revert "wifi: ath10k: avoid unnecessary wait for service ready message" + - tracing: tprobe-events: Fix to register tracepoint correctly + - tracing: tprobe-events: Fix to put tracepoint_user when disable the + tprobe + - net: libwx: fix device bus LAN ID + - scsi: ufs: core: Fix a race condition related to the "hid" attribute + group + - riscv: ptdump: use seq_puts() in pt_dump_seq_puts() macro + - Revert "wifi: ath12k: Fix missing station power save configuration" + - scsi: ufs: core: Revert "Make HID attributes visible" + - Bluetooth: btrtl: Fix memory leak in rtlbt_parse_firmware_v2() + - net: dsa: tag_brcm: legacy: fix untagged rx on unbridged ports for + bcm63xx + - selftests/net: fix out-of-order delivery of FIN in gro:tcp test + - selftests/net: use destination options instead of hop-by-hop + - selftests: netdevsim: Fix ethtool-coalesce.sh fail by installing + ethtool-common.sh + - net: vlan: sync VLAN features with lower device + - net: dsa: b53: fix resetting speed and pause on forced link + - net: dsa: b53: fix bcm63xx RGMII port link adjustment + - net: dsa: b53: fix enabling ip multicast + - net: dsa: b53: stop reading ARL entries if search is done + - net: dsa: b53: properly bound ARL searches for < 4 ARL bin chips + - sctp: Hold RCU read lock while iterating over address list + - sctp: Hold sock lock while iterating over address list + - net: ionic: add dma_wmb() before ringing TX doorbell + - net: ionic: map SKB after pseudo-header checksum prep + - octeontx2-pf: Fix devm_kcalloc() error checking + - bnxt_en: Fix a possible memory leak in bnxt_ptp_init + - bnxt_en: Always provide max entry and entry size in coredump segments + - bnxt_en: Fix warning in bnxt_dl_reload_down() + - wifi: mac80211_hwsim: Limit destroy_on_close radio removal to netgroup + - io_uring: fix types for region size calulation + - net/mlx5e: Fix return value in case of module EEPROM read error + - net: ti: icssg-prueth: Fix fdb hash size configuration + - net/mlx5e: SHAMPO, Fix header mapping for 64K pages + - net/mlx5e: SHAMPO, Fix skb size check for 64K pages + - net/mlx5e: SHAMPO, Fix header formulas for higher MTUs and 64K pages + - net: wan: framer: pef2256: Switch to devm_mfd_add_devices() + - net: dsa: microchip: Fix reserved multicast address table programming + - net: bridge: fix MST static key usage + - selftests/vsock: avoid false-positives when checking dmesg + - tracing: Fix memory leaks in create_field_var() + - drm/amd/display: Enable mst when it's detected but yet to be initialized + - wifi: cfg80211: add an hrtimer based delayed work item + - wifi: mac80211: use wiphy_hrtimer_work for ml_reconf_work + - wifi: mac80211: use wiphy_hrtimer_work for ttlm_work + - wifi: mac80211: use wiphy_hrtimer_work for csa.switch_work + - riscv: Fix memory leak in module_frob_arch_sections() + - rtc: rx8025: fix incorrect register reference + - x86/microcode/AMD: Add more known models to entry sign checking + - smb: client: validate change notify buffer before copy + - x86/amd_node: Fix AMD root device caching + - xfs: fix delalloc write failures in software-provided atomic writes + - xfs: fix various problems in xfs_atomic_write_cow_iomap_begin + - x86/CPU/AMD: Add missing terminator for zen5_rdseed_microcode + - drm: define NVIDIA DRM format modifiers for GB20x + - drm/nouveau: Advertise correct modifiers on GB20x + - drm/amdgpu/smu: Handle S0ix for vangogh + - drm/amdkfd: Don't clear PT after process killed + - virtio_net: fix alignment for virtio_net_hdr_v1_hash + - lib/crypto: curve25519-hacl64: Fix older clang KASAN workaround for GCC + - scsi: ufs: ufs-pci: Fix S0ix/S3 for Intel controllers + - scsi: ufs: ufs-pci: Set UFSHCD_QUIRK_PERFORM_LINK_STARTUP_ONCE for Intel + ADL + - scsi: ufs: core: Add a quirk to suppress link_startup_again + - drm/amd/display: update color on atomic commit time + - extcon: adc-jack: Cleanup wakeup source only if it was enabled + - kunit: Extend kconfig help text for KUNIT_UML_PCI + - ALSA: hda/tas2781: Enable init_profile_id for device initialization + - ACPI: SPCR: Check for table version when using precise baudrate + - kbuild: Strip trailing padding bytes from modules.builtin.modinfo + - drm/amdgpu: Fix unintended error log in VCN5_0_0 + - drm/amd/display: Fix vupdate_offload_work doc + - drm/amdgpu: Fix function header names in amdgpu_connectors.c + - drm/amdgpu/userq: assign an error code for invalid userq va + - drm/msm/dpu: Fix adjusted mode clock check for 3d merge + - drm/amd/display: Reject modes with too high pixel clock on DCE6-10 + - drm/amd/display: use GFP_NOWAIT for allocation in interrupt handler + - drm/amd/display: Fix black screen with HDMI outputs + - selftests: drv-net: Reload pkt pointer after calling filter_udphdr + - dt-bindings: eeprom: at25: use "size" for FRAMs without device ID + - Linux 6.17.8 + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68316 + - scsi: ufs: core: Fix invalid probe error return value + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40292 + - virtio-net: fix received length check in big packets + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68180 + - drm/amd/display: Fix NULL deref in debugfs odm_combine_segments + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40327 + - perf/core: Fix system hang caused by cpu-clock usage + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40328 + - smb: client: fix potential UAF in smb2_close_cached_fid() + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40291 + - io_uring: fix regbuf vector size truncation + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68322 + - parisc: Avoid crash due to unaligned access in unwinder + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40293 + - iommufd: Don't overflow during division for dirty tracking + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40294 + - Bluetooth: MGMT: Fix OOB access in parse_adv_monitor_pattern() + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40329 + - drm/sched: Fix deadlock in drm_sched_entity_kill_jobs_cb + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40295 + - fscrypt: fix left shift underflow when inode->i_blkbits > PAGE_SHIFT + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40296 + - platform/x86: int3472: Fix double free of GPIO device during unregister + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40297 + - net: bridge: fix use-after-free due to MST port state bypass + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68320 + - lan966x: Fix sleeping in atomic context + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68169 + - netpoll: Fix deadlock in memory allocation under spinlock + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68197 + - bnxt_en: Fix null pointer dereference in bnxt_bs_trace_check_wrap() + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40330 + - bnxt_en: Shutdown FW DMA in bnxt_shutdown() + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68192 + - net: usb: qmi_wwan: initialize MAC header offset in qmimux_rx_fixup + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40331 + - sctp: Prevent TOCTOU out-of-bounds write + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68187 + - net: mdio: Check regmap pointer returned by device_node_to_regmap() + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68167 + - gpiolib: fix invalid pointer access in debugfs + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68319 + - netconsole: Acquire su_mutex before navigating configs hierarchy + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40298 + - gve: Implement settime64 with -EOPNOTSUPP + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40299 + - gve: Implement gettimex64 with -EOPNOTSUPP + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40301 + - Bluetooth: hci_event: validate skb length for unknown CC opcode + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40358 + - riscv: stacktrace: Disable KASAN checks for non-current tasks + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68186 + - ring-buffer: Do not warn in ring_buffer_map_get_reader() when reader + catches up + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68184 + - drm/mediatek: Disable AFBC support on Mediatek DRM driver + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40302 + - media: videobuf2: forbid remove_bufs when legacy fileio is active + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40303 + - btrfs: ensure no dirty metadata is written back for an fs with errors + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40362 + - ceph: fix multifs mds auth caps issue + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40332 + - drm/amdkfd: Fix mmap write lock not release + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40304 + - fbdev: Add bounds checking in bit_putcs to fix vmalloc-out-of-bounds + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40305 + - 9p/trans_fd: p9_fd_request: kick rx thread if EPOLLIN + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68318 + - clk: thead: th1520-ap: set all AXI clocks to CLK_IS_CRITICAL + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40209 + - btrfs: fix memory leak of qgroup_list in btrfs_add_qgroup_relation + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68183 + - ima: don't clear IMA_DIGSIG flag when setting or removing non-IMA xattr + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68173 + - ftrace: Fix softlockup in ftrace_module_enable + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40306 + - orangefs: fix xattr related buffer overflow... + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40307 + - exfat: validate cluster allocation bits of the allocation bitmap + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40308 + - Bluetooth: bcsp: receive data only if registered + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40309 + - Bluetooth: SCO: Fix UAF on sco_conn_free + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68174 + - amd/amdkfd: enhance kfd process check in switch partition + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40310 + - amd/amdkfd: resolve a race in amdgpu_amdkfd_device_fini_sw + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40361 + - fs: ext4: change GFP_KERNEL to GFP_NOFS to avoid deadlock + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40311 + - accel/habanalabs: support mapping cb with vmalloc-backed coherent memory + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68185 + - nfs4_setup_readdir(): insufficient locking for ->d_parent->d_inode + dereferencing + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68176 + - PCI: cadence: Check for the existence of cdns_pcie::ops before using it + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68190 + - drm/amdgpu/atom: Check kcalloc() for WS buffer in + amdgpu_atom_execute_table_locked() + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68168 + - jfs: fix uninitialized waitqueue in transaction manager + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40312 + - jfs: Verify inode mode when loading from disk + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40333 + - f2fs: fix infinite loop in __insert_extent_tree() + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68321 + - page_pool: always add GFP_NOWARN for ATOMIC allocations + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40334 + - drm/amdgpu: validate userq buffer virtual address and size + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68191 + - udp_tunnel: use netdev_warn() instead of netdev_WARN() + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68309 + - PCI/AER: Fix NULL pointer access by aer_info + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40313 + - ntfs3: pretend $Extend records as regular files + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40335 + - drm/amdgpu: validate userq input args + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40314 + - usb: cdns3: gadget: Use-after-free during failed initialization and exit + of cdnsp gadget + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40336 + - drm/gpusvm: fix hmm_pfn_to_map_order() usage + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68193 + - drm/xe/guc: Add devm release action to safely tear down CT + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68175 + - media: nxp: imx8-isi: Fix streaming cleanup on release + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68188 + - tcp: use dst_dev_rcu() in tcp_fastopen_active_disable_ofo_check() + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68315 + - f2fs: fix to detect potential corrupted nid in free_nid_list + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40337 + - net: stmmac: Correctly handle Rx checksum offload errors + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40338 + - ASoC: Intel: avs: Do not share the name pointer between components + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40339 + - drm/amdgpu: fix nullptr err of vm_handle_moved + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68194 + - media: imon: make send_packet() more robust + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40363 + - net: ipv6: fix field-spanning memcpy warning in AH output + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68311 + - tty: serial: ip22zilog: Use platform device for probing + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40340 + - drm/xe: Fix oops in xe_gem_fault when running core_hotunplug test. + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68196 + - drm/amd/display: Cache streams targeting link when performing LT + automation + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68178 + - blk-cgroup: fix possible deadlock while configuring policy + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40341 + - futex: Don't leak robust_list pointer on exec race + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40342 + - nvme-fc: use lock accessing port_state and rport state + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40343 + - nvmet-fc: avoid scheduling association deletion twice + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68177 + - cpufreq/longhaul: handle NULL policy in longhaul_exit + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68317 + - io_uring/zctx: check chained notif contexts + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40315 + - usb: gadget: f_fs: Fix epfile null pointer access after ep enable. + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40316 + - drm/mediatek: Fix device use-after-free on unbind + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40360 + - drm/sysfb: Do not dereference NULL pointer in plane reset + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68179 + - s390: Disable ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68310 + - s390/pci: Avoid deadlock between PCI error recovery and mlx5 crdump + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40317 + - regmap: slimbus: fix bus_context pointer in regmap init calls + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40359 + - perf/x86/intel: Fix KASAN global-out-of-bounds warning + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68181 + - drm/radeon: Remove calls to drm_put_dev() + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68170 + - drm/radeon: Do not kfree() devres managed rdev + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40213 + - Bluetooth: MGMT: fix crash in set_mesh_sync and set_mesh_complete + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40318 + - Bluetooth: hci_sync: fix race in hci_cmd_sync_dequeue_once + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68312 + - usbnet: Prevents free active kevent + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40344 + - ASoC: Intel: avs: Disable periods-elapsed work when closing PCM + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68172 + - crypto: aspeed - fix double free caused by devm + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40319 + - bpf: Sync pending IRQ work before freeing ring buffer + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68182 + - wifi: iwlwifi: fix potential use after free in iwl_mld_remove_link() + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68314 + - drm/msm: make sure last_fence is always updated + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68189 + - drm/msm: Fix GEM free for imported dma-bufs + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68171 + - x86/fpu: Ensure XFD state on signal delivery + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-68313 + - x86/CPU/AMD: Add RDSEED fix for Zen5 + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40320 + - smb: client: fix potential cfid UAF in smb2_query_info_compound + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40321 + - wifi: brcmfmac: fix crash while sending Action Frames in standalone AP + Mode + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40322 + - fbdev: bitblit: bound-check glyph index in bit_putcs* + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40211 + - ACPI: video: Fix use-after-free in acpi_video_switch_brightness() + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40323 + - fbcon: Set fb_display[i]->mode to NULL when the mode is released + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40210 + - Revert "NFSD: Remove the cap on number of operations per NFSv4 COMPOUND" + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40324 + - NFSD: Fix crash in nfsd4_read_release() + * Questing update: v6.17.8 upstream stable release (LP: #2136833) // + CVE-2025-40326 + - NFSD: Define actions for the new time_deleg FATTR4 attributes + * Questing update: v6.17.7 upstream stable release (LP: #2136813) + - sched_ext: Move internal type and accessor definitions to ext_internal.h + - sched_ext: Put event_stats_cpu in struct scx_sched_pcpu + - sched_ext: Sync error_irq_work before freeing scx_sched + - timekeeping: Fix aux clocks sysfs initialization loop bound + - x86/bugs: Report correct retbleed mitigation status + - x86/bugs: Qualify RETBLEED_INTEL_MSG + - genirq/chip: Add buslock back in to irq_set_handler() + - genirq/manage: Add buslock back in to __disable_irq_nosync() + - genirq/manage: Add buslock back in to enable_irq() + - audit: record fanotify event regardless of presence of rules + - EDAC/ie31200: Add two more Intel Alder Lake-S SoCs for EDAC support + - perf/x86/intel: Add ICL_FIXED_0_ADAPTIVE bit into INTEL_FIXED_BITS_MASK + - perf: Use current->flags & PF_KTHREAD|PF_USER_WORKER instead of + current->mm == NULL + - perf: Have get_perf_callchain() return NULL if crosstask and user are + set + - perf: Skip user unwind if the task is a kernel thread + - EDAC: Fix wrong executable file modes for C source files + - seccomp: passthrough uprobe systemcall without filtering + - sched_ext: Keep bypass on between enable failure and + scx_disable_workfn() + - x86/bugs: Add attack vector controls for VMSCAPE + - x86/bugs: Fix reporting of LFENCE retpoline + - EDAC/mc_sysfs: Increase legacy channel support to 16 + - cpuset: Use new excpus for nocpu error check when enabling root + partition + - btrfs: abort transaction on specific error places when walking log tree + - btrfs: abort transaction in the process_one_buffer() log tree walk + callback + - btrfs: zoned: return error from btrfs_zone_finish_endio() + - btrfs: zoned: refine extent allocator hint selection + - btrfs: scrub: replace max_t()/min_t() with clamp() in + scrub_throttle_dev_io() + - btrfs: always drop log root tree reference in btrfs_replay_log() + - btrfs: use level argument in log tree walk callback replay_one_buffer() + - btrfs: abort transaction if we fail to update inode in log replay dir + fixup + - btrfs: tree-checker: add inode extref checks + - btrfs: use smp_mb__after_atomic() when forcing COW in + create_pending_snapshot() + - sched_ext: Make qmap dump operation non-destructive + - arch: Add the macro COMPILE_OFFSETS to all the asm-offsets.c + - btrfs: tree-checker: fix bounds check in check_inode_extref() + - Linux 6.17.7 + * [UBUNTU 24.04] KVM: s390: improve interrupt cpu for wakeup (LP: #2132317) + - KVM: s390: improve interrupt cpu for wakeup + * Questing update: v6.17.6 upstream stable release (LP: #2134982) + - sched/fair: Block delayed tasks on throttled hierarchy during dequeue + - vfio/cdx: update driver to build without CONFIG_GENERIC_MSI_IRQ + - expfs: Fix exportfs_can_encode_fh() for EXPORT_FH_FID + - cgroup/misc: fix misc_res_type kernel-doc warning + - dlm: move to rinfo for all middle conversion cases + - exec: Fix incorrect type for ret + - s390/pkey: Forward keygenflags to ep11_unwrapkey + - hfs: clear offset and space out of valid records in b-tree node + - hfs: make proper initalization of struct hfs_find_data + - hfs: validate record offset in hfsplus_bmap_alloc + - hfsplus: fix KMSAN uninit-value issue in hfsplus_delete_cat() + - dlm: check for defined force value in dlm_lockspace_release + - hfsplus: return EIO when type of hidden directory mismatch in + hfsplus_fill_super() + - PCI: Test for bit underflow in pcie_set_readrq() + - lkdtm: fortify: Fix potential NULL dereference on kmalloc failure + - arm64: sysreg: Correct sign definitions for EIESB and DoubleLock + - m68k: bitops: Fix find_*_bit() signatures + - powerpc/32: Remove PAGE_KERNEL_TEXT to fix startup failure + - riscv: mm: Return intended SATP mode for noXlvl options + - riscv: mm: Use mmu-type from FDT to limit SATP mode + - riscv: cpufeature: add validation for zfa, zfh and zfhmin + - drivers/perf: hisi: Relax the event ID check in the framework + - s390/mm: Use __GFP_ACCOUNT for user page table allocations + - smb: client: queue post_recv_credits_work also if the peer raises the + credit target + - smb: client: limit the range of info->receive_credit_target + - smb: client: make use of ib_wc_status_msg() and skip IB_WC_WR_FLUSH_ERR + logging + - smb: server: let smb_direct_flush_send_list() invalidate a remote key + first + - Unbreak 'make tools/*' for user-space targets + - platform/mellanox: mlxbf-pmc: add sysfs_attr_init() to count_clock init + - cpufreq/amd-pstate: Fix a regression leading to EPP 0 after hibernate + - net/mlx5e: Return 1 instead of 0 in invalid case in + mlx5e_mpwrq_umr_entry_size() + - rtnetlink: Allow deleting FDB entries in user namespace + - net: enetc: fix the deadlock of enetc_mdio_lock + - net: enetc: correct the value of ENETC_RXB_TRUESIZE + - dpaa2-eth: fix the pointer passed to PTR_ALIGN on Tx path + - net: phy: realtek: fix rtl8221b-vm-cg name + - can: bxcan: bxcan_start_xmit(): use can_dev_dropped_skb() instead of + can_dropped_invalid_skb() + - can: esd: acc_start_xmit(): use can_dev_dropped_skb() instead of + can_dropped_invalid_skb() + - can: rockchip-canfd: rkcanfd_start_xmit(): use can_dev_dropped_skb() + instead of can_dropped_invalid_skb() + - selftests: net: fix server bind failure in sctp_vrf.sh + - net/mlx5e: RX, Fix generating skb from non-linear xdp_buff for legacy RQ + - net/mlx5e: RX, Fix generating skb from non-linear xdp_buff for striding + RQ + - net/smc: fix general protection fault in __smc_diag_dump + - net: ethernet: ti: am65-cpts: fix timestamp loss due to race conditions + - arm64, mm: avoid always making PTE dirty in pte_mkwrite() + - erofs: avoid infinite loops due to corrupted subpage compact indexes + - net: hibmcge: select FIXED_PHY + - ptp: ocp: Fix typo using index 1 instead of i in SMA initialization loop + - net: hsr: prevent creation of HSR device with slaves from another netns + - espintcp: use datagram_poll_queue for socket readiness + - net: datagram: introduce datagram_poll_queue for custom receive queues + - ovpn: use datagram_poll_queue for socket readiness in TCP + - net: bonding: fix possible peer notify event loss or dup issue + - hung_task: fix warnings caused by unaligned lock pointers + - mm: don't spin in add_stack_record when gfp flags don't allow + - dma-debug: don't report false positives with + DMA_BOUNCE_UNALIGNED_KMALLOC + - arch_topology: Fix incorrect error check in + topology_parse_cpu_capacity() + - riscv: hwprobe: Fix stale vDSO data for late-initialized keys at boot + - io_uring/sqpoll: switch away from getrusage() for CPU accounting + - io_uring/sqpoll: be smarter on when to update the stime usage + - btrfs: send: fix duplicated rmdir operations when using extrefs + - btrfs: ref-verify: fix IS_ERR() vs NULL check in btrfs_build_ref_tree() + - gpio: pci-idio-16: Define maximum valid register address offset + - gpio: 104-idio-16: Define maximum valid register address offset + - xfs: fix locking in xchk_nlinks_collect_dir + - platform/x86: alienware-wmi-wmax: Add AWCC support to Dell G15 5530 + - Revert "cpuidle: menu: Avoid discarding useful information" + - riscv: cpufeature: avoid uninitialized variable in + has_thead_homogeneous_vlenb() + - rust: device: fix device context of Device::parent() + - slab: Avoid race on slab->obj_exts in alloc_slab_obj_exts + - slab: Fix obj_ext mistakenly considered NULL due to race condition + - smb: client: get rid of d_drop() in cifs_do_rename() + - ACPICA: Work around bogus -Wstringop-overread warning since GCC 11 + - arm64: mte: Do not warn if the page is already tagged in copy_highpage() + - can: netlink: can_changelink(): allow disabling of automatic restart + - cifs: Fix TCP_Server_Info::credits to be signed + - devcoredump: Fix circular locking dependency with devcd->mutex. + - hwmon: (pmbus/max34440) Update adpm12160 coeff due to latest FW + - MIPS: Malta: Fix keyboard resource preventing i8042 driver from + registering + - rv: Make rtapp/pagefault monitor depends on CONFIG_MMU + - net: bonding: update the slave array for broadcast mode + - net: stmmac: dwmac-rk: Fix disabling set_clock_selection + - net: usb: rtl8150: Fix frame padding + - net: ravb: Enforce descriptor type ordering + - net: ravb: Ensure memory write completes before ringing TX doorbell + - mptcp: pm: in-kernel: C-flag: handle late ADD_ADDR + - selftests: mptcp: join: mark 'flush re-add' as skipped if not supported + - selftests: mptcp: join: mark implicit tests as skipped if not supported + - selftests: mptcp: join: mark 'delete re-add signal' as skipped if not + supported + - mm/mremap: correctly account old mapping after MREMAP_DONTUNMAP remap + - drm/xe: Check return value of GGTT workqueue allocation + - drm/amd/display: increase max link count and fix link->enc NULL pointer + access + - mm/damon/core: use damos_commit_quota_goal() for new goal commit + - mm/damon/core: fix list_add_tail() call on damon_call() + - spi: rockchip-sfc: Fix DMA-API usage + - firmware: arm_ffa: Add support for IMPDEF value in the memory access + descriptor + - spi: spi-nxp-fspi: add the support for sample data from DQS pad + - spi: spi-nxp-fspi: re-config the clock rate when operation require new + clock rate + - spi: spi-nxp-fspi: add extra delay after dll locked + - spi: spi-nxp-fspi: limit the clock rate for different sample clock + source selection + - spi: cadence-quadspi: Fix pm_runtime unbalance on dma EPROBE_DEFER + - arm64: dts: broadcom: bcm2712: Add default GIC address cells + - arm64: dts: broadcom: bcm2712: Define VGIC interrupt + - include: trace: Fix inflight count helper on failed initialization + - firmware: arm_scmi: Fix premature SCMI_XFER_FLAG_IS_RAW clearing in raw + mode + - spi: airoha: return an error for continuous mode dirmap creation cases + - spi: airoha: add support of dual/quad wires spi modes to exec_op() + handler + - spi: airoha: switch back to non-dma mode in the case of error + - spi: airoha: fix reading/writing of flashes with more than one plane per + lun + - sysfs: check visibility before changing group attribute ownership + - RISC-V: Define pgprot_dmacoherent() for non-coherent devices + - RISC-V: Don't print details of CPUs disabled in DT + - riscv: hwprobe: avoid uninitialized variable use in hwprobe_arch_id() + - hwmon: (pmbus/isl68137) Fix child node reference leak on early return + - hwmon: (sht3x) Fix error handling + - io_uring: fix incorrect unlikely() usage in io_waitid_prep() + - nbd: override creds to kernel when calling sock_{send,recv}msg() + - drm/panic: Fix drawing the logo on a small narrow screen + - drm/panic: Fix qr_code, ensure vmargin is positive + - drm/panic: Fix 24bit pixel crossing page boundaries + - of/irq: Convert of_msi_map_id() callers to of_msi_xlate() + - of/irq: Add msi-parent check to of_msi_xlate() + - block: require LBA dma_alignment when using PI + - gpio: ljca: Fix duplicated IRQ mapping + - io_uring: correct __must_hold annotation in io_install_fixed_file + - sched: Remove never used code in mm_cid_get() + - USB: serial: option: add UNISOC UIS7720 + - USB: serial: option: add Quectel RG255C + - USB: serial: option: add Telit FN920C04 ECM compositions + - usb/core/quirks: Add Huawei ME906S to wakeup quirk + - usb: raw-gadget: do not limit transfer length + - xhci: dbc: enable back DbC in resume if it was enabled before suspend + - xhci: dbc: fix bogus 1024 byte prefix if ttyDBC read races with stall + event + - x86/microcode: Fix Entrysign revision check for Zen1/Naples + - binder: remove "invalid inc weak" check + - mei: me: add wildcat lake P DID + - objtool/rust: add one more `noreturn` Rust function + - nvmem: rcar-efuse: add missing MODULE_DEVICE_TABLE + - misc: fastrpc: Fix dma_buf object leak in fastrpc_map_lookup + - most: usb: hdm_probe: Fix calling put_device() before device + initialization + - tcpm: switch check for role_sw device with fw_node + - dt-bindings: serial: sh-sci: Fix r8a78000 interrupts + - dt-bindings: usb: dwc3-imx8mp: dma-range is required only for imx8mp + - dt-bindings: usb: qcom,snps-dwc3: Fix bindings for X1E80100 + - serial: 8250_dw: handle reset control deassert error + - serial: 8250_exar: add support for Advantech 2 port card with Device ID + 0x0018 + - serial: 8250_mtk: Enable baud clock and manage in runtime PM + - serial: sc16is7xx: remove useless enable of enhanced features + - staging: gpib: Fix device reference leak in fmh_gpib driver + - staging: gpib: Fix no EOI on 1 and 2 byte writes + - staging: gpib: Return -EINTR on device clear + - staging: gpib: Fix sending clear and trigger events + - mm/migrate: remove MIGRATEPAGE_UNMAP + - treewide: remove MIGRATEPAGE_SUCCESS + - vmw_balloon: indicate success when effectively deflating during + migration + - xfs: always warn about deprecated mount options + - gpio: regmap: Allow to allocate regmap-irq device + - gpio: regmap: add the .fixed_direction_output configuration parameter + - gpio: idio-16: Define fixed direction of the GPIO lines + - Linux 6.17.6 + * Questing update: v6.17.6 upstream stable release (LP: #2134982) // + CVE-2025-40084 + - ksmbd: transport_ipc: validate payload size before reading handle + * Questing update: v6.17.6 upstream stable release (LP: #2134982) // + CVE-2025-40222 + - tty: serial: sh-sci: fix RSCI FIFO overrun handling + * Questing update: v6.17.6 upstream stable release (LP: #2134982) // + CVE-2025-40223 + - most: usb: Fix use-after-free in hdm_disconnect + * Questing update: v6.17.6 upstream stable release (LP: #2134982) // + CVE-2025-40106 + - comedi: fix divide-by-zero in comedi_buf_munge() + * Questing update: v6.17.6 upstream stable release (LP: #2134982) // + CVE-2025-40224 + - hwmon: (cgbc-hwmon) Add missing NULL check after devm_kzalloc() + * Questing update: v6.17.6 upstream stable release (LP: #2134982) // + CVE-2025-40225 + - drm/panthor: Fix kernel panic on partial unmap of a GPU VA region + * Questing update: v6.17.6 upstream stable release (LP: #2134982) // + CVE-2025-40226 + - firmware: arm_scmi: Account for failed debug initialization + * Questing update: v6.17.6 upstream stable release (LP: #2134982) // + CVE-2025-40227 + - mm/damon/sysfs: dealloc commit test ctx always + * Questing update: v6.17.6 upstream stable release (LP: #2134982) // + CVE-2025-40228 + - mm/damon/sysfs: catch commit test ctx alloc failure + * Questing update: v6.17.6 upstream stable release (LP: #2134982) // + CVE-2025-40229 + - mm/damon/core: fix potential memory leak by cleaning ops_filter in + damon_destroy_scheme + * Questing update: v6.17.6 upstream stable release (LP: #2134982) // + CVE-2025-40230 + - mm: prevent poison consumption when splitting THP + * Questing update: v6.17.6 upstream stable release (LP: #2134982) // + CVE-2025-40231 + - vsock: fix lock inversion in vsock_assign_transport() + * Questing update: v6.17.6 upstream stable release (LP: #2134982) // + CVE-2025-40233 + - ocfs2: clear extent cache after moving/defragmenting extents + * Questing update: v6.17.6 upstream stable release (LP: #2134982) // + CVE-2025-40235 + - btrfs: directly free partially initialized fs_info in + btrfs_check_leaked_roots() + * Questing update: v6.17.6 upstream stable release (LP: #2134982) // + CVE-2025-40236 + - virtio-net: zero unused hash fields + * Questing update: v6.17.6 upstream stable release (LP: #2134982) // + CVE-2025-40237 + - fs/notify: call exportfs_encode_fid with s_umount + * Questing update: v6.17.6 upstream stable release (LP: #2134982) // + CVE-2025-40238 + - net/mlx5: Fix IPsec cleanup over MPV device + * Questing update: v6.17.6 upstream stable release (LP: #2134982) // + CVE-2025-40239 + - net: phy: micrel: always set shared->phydev for LAN8814 + * Questing update: v6.17.6 upstream stable release (LP: #2134982) // + CVE-2025-40240 + - sctp: avoid NULL dereference when chunk data buffer is missing + * Questing update: v6.17.6 upstream stable release (LP: #2134982) // + CVE-2025-40241 + - erofs: fix crafted invalid cases for encoded extents + * Questing update: v6.17.6 upstream stable release (LP: #2134982) // + CVE-2025-40242 + - gfs2: Fix unlikely race in gdlm_put_lock + * Questing update: v6.17.6 upstream stable release (LP: #2134982) // + CVE-2025-40243 + - hfs: fix KMSAN uninit-value issue in hfs_find_set_zero_bits() + * Questing update: v6.17.6 upstream stable release (LP: #2134982) // + CVE-2025-40244 + - hfsplus: fix KMSAN uninit-value issue in __hfsplus_ext_cache_extent() + * Questing update: v6.17.6 upstream stable release (LP: #2134982) // + CVE-2025-40245 + - nios2: ensure that memblock.current_limit is set when setting pfn limits + * Questing update: v6.17.5 upstream stable release (LP: #2133557) + - docs: kdoc: handle the obsolescensce of docutils.ErrorString() + - Revert "fs: make vfs_fileattr_[get|set] return -EOPNOTSUPP" + - PCI: vmd: Override irq_startup()/irq_shutdown() in + vmd_init_dev_msi_info() + - ata: libata-core: relax checks in ata_read_log_directory() + - arm64/sysreg: Fix GIC CDEOI instruction encoding + - ixgbevf: fix getting link speed data for E610 devices + - rust: cfi: only 64-bit arm and x86 support CFI_CLANG + - x86/CPU/AMD: Prevent reset reasons from being retained across reboot + - slab: reset slab->obj_ext when freeing and it is OBJEXTS_ALLOC_FAIL + - Revert "io_uring/rw: drop -EOPNOTSUPP check in + __io_complete_rw_common()" + - io_uring: protect mem region deregistration + - Revert "drm/amd/display: Only restore backlight after amdgpu_dm_init or + dm_resume" + - r8152: add error handling in rtl8152_driver_init + - net: usb: lan78xx: Fix lost EEPROM write timeout error(-ETIMEDOUT) in + lan78xx_write_raw_eeprom + - f2fs: fix wrong block mapping for multi-devices + - gve: Check valid ts bit on RX descriptor before hw timestamping + - jbd2: ensure that all ongoing I/O complete before freeing blocks + - ext4: wait for ongoing I/O to complete before freeing blocks + - btrfs: fix clearing of BTRFS_FS_RELOC_RUNNING if relocation already + running + - btrfs: fix memory leak on duplicated memory in the qgroup assign ioctl + - btrfs: only set the device specific options after devices are opened + - btrfs: fix incorrect readahead expansion length + - can: gs_usb: gs_make_candev(): populate net_device->dev_port + - can: gs_usb: increase max interface to U8_MAX + - cxl/acpi: Fix setup of memory resource in cxl_acpi_set_cache_size() + - ALSA: hda/intel: Add MSI X870E Tomahawk to denylist + - ALSA: hda/realtek: Add quirk entry for HP ZBook 17 G6 + - drm/amdgpu: use atomic functions with memory barriers for vm fault info + - drm/amdgpu: fix gfx12 mes packet status return check + - drm/xe: Increase global invalidation timeout to 1000us + - perf/core: Fix address filter match with backing files + - perf/core: Fix MMAP event path names with backing files + - perf/core: Fix MMAP2 event device with backing files + - drm/amd: Check whether secure display TA loaded successfully + - PM: hibernate: Add pm_hibernation_mode_is_suspend() + - drm/amd: Fix hybrid sleep + - usb: gadget: Store endpoint pointer in usb_request + - usb: gadget: Introduce free_usb_request helper + - HID: multitouch: fix sticky fingers + - dax: skip read lock assertion for read-only filesystems + - coredump: fix core_pattern input validation + - can: m_can: m_can_plat_remove(): add missing pm_runtime_disable() + - can: m_can: m_can_handle_state_errors(): fix CAN state transition to + Error Active + - can: m_can: m_can_chip_config(): bring up interface in correct state + - can: m_can: fix CAN state in system PM + - net: mtk: wed: add dma mask limitation and GFP_DMA32 for device with + more than 4GB DRAM + - net: dlink: handle dma_map_single() failure properly + - doc: fix seg6_flowlabel path + - can: j1939: add missing calls in NETDEV_UNREGISTER notification handler + - dpll: zl3073x: Refactor DPLL initialization + - dpll: zl3073x: Handle missing or corrupted flash configuration + - r8169: fix packet truncation after S4 resume on RTL8168H/RTL8111H + - net: phy: bcm54811: Fix GMII/MII/MII-Lite selection + - net: phy: realtek: Avoid PHYCR2 access if PHYCR2 not present + - amd-xgbe: Avoid spurious link down messages during interface toggle + - Octeontx2-af: Fix missing error code in cgx_probe() + - tcp: fix tcp_tso_should_defer() vs large RTT + - net: airoha: Take into account out-of-order tx completions in + airoha_dev_xmit() + - selftests: net: check jq command is supported + - net: core: fix lockdep splat on device unregister + - ksmbd: fix recursive locking in RPC handle list access + - tg3: prevent use of uninitialized remote_adv and local_adv variables + - tls: trim encrypted message to match the plaintext on short splice + - tls: wait for async encrypt in case of error during latter iterations of + sendmsg + - tls: always set record_type in tls_process_cmsg + - tls: don't rely on tx_work during send() + - netdevsim: set the carrier when the device goes up + - net: usb: lan78xx: fix use of improperly initialized dev->chipid in + lan78xx_reset + - drm/panthor: Ensure MCU is disabled on suspend + - nvme-multipath: Skip nr_active increments in RETRY disposition + - riscv: kprobes: Fix probe address validation + - drm/bridge: lt9211: Drop check for last nibble of version register + - powerpc/fadump: skip parameter area allocation when fadump is disabled + - ASoC: codecs: Fix gain setting ranges for Renesas IDT821034 codec + - ASoC: nau8821: Cancel jdet_work before handling jack ejection + - ASoC: nau8821: Generalize helper to clear IRQ status + - ASoC: nau8821: Consistently clear interrupts before unmasking + - ASoC: nau8821: Add DMI quirk to bypass jack debounce circuit + - drm/i915/guc: Skip communication warning on reset in progress + - drm/i915/frontbuffer: Move bo refcounting + intel_frontbuffer_{get,release}() + - drm/i915/fb: Fix the set_tiling vs. addfb race, again + - drm/amdgpu: add ip offset support for cyan skillfish + - drm/amdgpu: add support for cyan skillfish without IP discovery + - drm/amdgpu: fix handling of harvesting for ip_discovery firmware + - drm/amdgpu: handle wrap around in reemit handling + - drm/amdgpu: set an error on all fences from a bad context + - drm/amdgpu: drop unused structures in amdgpu_drm.h + - drm/amd/powerplay: Fix CIK shutdown temperature + - drm/xe: Enable media sampler power gating + - drm/draw: fix color truncation in drm_draw_fill24 + - drm/rockchip: vop2: use correct destination rectangle height check + - HID: intel-thc-hid: Intel-quickspi: switch first interrupt from level to + edge detection + - sched/fair: Fix pelt lost idle time detection + - ALSA: firewire: amdtp-stream: fix enum kernel-doc warnings + - accel/qaic: Synchronize access to DBC request queue head & tail pointer + - nvme-auth: update sc_c in host response + - cxl/trace: Subtract to find an hpa_alias0 in cxl_poison events + - selftests/bpf: make arg_parsing.c more robust to crashes + - blk-mq: fix stale tag depth for shared sched tags in + blk_mq_update_nr_requests() + - block: Remove elevator_lock usage from blkg_conf frozen operations + - HID: hid-input: only ignore 0 battery events for digitizers + - HID: multitouch: fix name of Stylus input devices + - drm/xe/evict: drop bogus assert + - selftests: arg_parsing: Ensure data is flushed to disk before reading. + - nvme/tcp: handle tls partially sent records in write_space() + - rust: cpufreq: fix formatting + - arm64: debug: always unmask interrupts in el0_softstp() + - arm64: cputype: Add Neoverse-V3AE definitions + - arm64: errata: Apply workarounds for Neoverse-V3AE + - xfs: rename the old_crc variable in xlog_recover_process + - xfs: fix log CRC mismatches between i386 and other architectures + - NFSD: Rework encoding and decoding of nfsd4_deviceid + - NFSD: Minor cleanup in layoutcommit processing + - NFSD: Implement large extent array support in pNFS + - NFSD: Fix last write offset handling in layoutcommit + - phy: cdns-dphy: Store hs_clk_rate and return it + - phy: cadence: cdns-dphy: Fix PLL lock and O_CMN_READY polling + - x86/resctrl: Refactor resctrl_arch_rmid_read() + - x86/resctrl: Fix miscount of bandwidth event when reactivating + previously unavailable RMID + - cxl: Fix match_region_by_range() to use region_res_match_cxl_range() + - phy: cadence: cdns-dphy: Update calibration wait time for startup state + machine + - drm/xe: Use devm_ioremap_wc for VRAM mapping and drop manual unmap + - drm/xe: Use dynamic allocation for tile and device VRAM region + structures + - drm/xe: Move struct xe_vram_region to a dedicated header + - drm/xe: Unify the initialization of VRAM regions + - drm/xe: Move rebar to be done earlier + - PM: hibernate: Fix pm_hibernation_mode_is_suspend() build breakage + - drm/xe: Fix an IS_ERR() vs NULL bug in xe_tile_alloc_vram() + - Linux 6.17.5 + * Questing update: v6.17.5 upstream stable release (LP: #2133557) // + CVE-2025-40086 + - drm/xe: Don't allow evicting of BOs in same VM in array of VM binds + * Questing update: v6.17.5 upstream stable release (LP: #2133557) // + CVE-2025-40087 + - NFSD: Define a proc_layoutcommit for the FlexFiles layout type + * Questing update: v6.17.5 upstream stable release (LP: #2133557) // + CVE-2025-40088 + - hfsplus: fix slab-out-of-bounds read in hfsplus_strcasecmp() + * Questing update: v6.17.5 upstream stable release (LP: #2133557) // + CVE-2025-40162 + - ASoC: amd/sdw_utils: avoid NULL deref when devm_kasprintf() fails + * Questing update: v6.17.5 upstream stable release (LP: #2133557) // + CVE-2025-40085 + - ALSA: usb-audio: Fix NULL pointer deference in try_to_register_card + * Questing update: v6.17.5 upstream stable release (LP: #2133557) // + CVE-2025-40172 + - accel/qaic: Treat remaining == 0 as error in find_and_map_user_pages() + * Questing update: v6.17.5 upstream stable release (LP: #2133557) // + CVE-2025-40177 + - accel/qaic: Fix bootlog initialization ordering + * Questing update: v6.17.5 upstream stable release (LP: #2133557) // + CVE-2025-40163 + - sched/deadline: Stop dl_server before CPU goes offline + * Questing update: v6.17.5 upstream stable release (LP: #2133557) // + CVE-2025-40174 + - x86/mm: Fix SMP ordering in switch_mm_irqs_off() + * Questing update: v6.17.5 upstream stable release (LP: #2133557) // + CVE-2025-40089 + - cxl/features: Add check for no entries in cxl_feature_info + * Questing update: v6.17.5 upstream stable release (LP: #2133557) // + CVE-2025-40176 + - tls: wait for pending async decryptions if tls_strp_msg_hold fails + * Questing update: v6.17.5 upstream stable release (LP: #2133557) // + CVE-2025-40164 + - usbnet: Fix using smp_processor_id() in preemptible code warnings + * Questing update: v6.17.5 upstream stable release (LP: #2133557) // + CVE-2025-40091 + - ixgbe: fix too early devlink_free() in ixgbe_remove() + * Questing update: v6.17.5 upstream stable release (LP: #2133557) // + CVE-2025-40175 + - idpf: cleanup remaining SKBs in PTP flows + * Questing update: v6.17.5 upstream stable release (LP: #2133557) // + CVE-2025-40173 + - net/ip6_tunnel: Prevent perpetual tunnel growth + * Questing update: v6.17.5 upstream stable release (LP: #2133557) // + CVE-2025-40092 + - usb: gadget: f_ncm: Refactor bind path to use __free() + * Questing update: v6.17.5 upstream stable release (LP: #2133557) // + CVE-2025-40093 + - usb: gadget: f_ecm: Refactor bind path to use __free() + * Questing update: v6.17.5 upstream stable release (LP: #2133557) // + CVE-2025-40094 + - usb: gadget: f_acm: Refactor bind path to use __free() + * Questing update: v6.17.5 upstream stable release (LP: #2133557) // + CVE-2025-40095 + - usb: gadget: f_rndis: Refactor bind path to use __free() + * Questing update: v6.17.5 upstream stable release (LP: #2133557) // + CVE-2025-40165 + - media: nxp: imx8-isi: m2m: Fix streaming cleanup on release + * Questing update: v6.17.5 upstream stable release (LP: #2133557) // + CVE-2025-40096 + - drm/sched: Fix potential double free in + drm_sched_job_add_resv_dependencies + * Questing update: v6.17.5 upstream stable release (LP: #2133557) // + CVE-2025-40097 + - ALSA: hda: Fix missing pointer check in hda_component_manager_init + function + * Questing update: v6.17.5 upstream stable release (LP: #2133557) // + CVE-2025-40098 + - ALSA: hda: cs35l41: Fix NULL pointer dereference in + cs35l41_get_acpi_mute_state() + * Questing update: v6.17.5 upstream stable release (LP: #2133557) // + CVE-2025-40099 + - cifs: parse_dfs_referrals: prevent oob on malformed input + * Questing update: v6.17.5 upstream stable release (LP: #2133557) // + CVE-2025-40100 + - btrfs: do not assert we found block group item when creating free space + tree + * Questing update: v6.17.5 upstream stable release (LP: #2133557) // + CVE-2025-40101 + - btrfs: fix memory leaks when rejecting a non SINGLE data profile without + an RST + * Questing update: v6.17.5 upstream stable release (LP: #2133557) // + CVE-2025-40167 + - ext4: detect invalid INLINE_DATA + EXTENTS flag combination + * Questing update: v6.17.5 upstream stable release (LP: #2133557) // + CVE-2025-40102 + - KVM: arm64: Prevent access to vCPU events before init + * Questing update: v6.17.5 upstream stable release (LP: #2133557) // + CVE-2025-40103 + - smb: client: Fix refcount leak for cifs_sb_tlink + * Questing update: v6.17.5 upstream stable release (LP: #2133557) // + CVE-2025-40104 + - ixgbevf: fix mailbox API compatibility by negotiating supported features + * Questing update: v6.17.5 upstream stable release (LP: #2133557) // + CVE-2025-40166 + - drm/xe/guc: Check GuC running state before deregistering exec queue + * Questing update: v6.17.5 upstream stable release (LP: #2133557) // + CVE-2025-40105 + - vfs: Don't leak disconnected dentries on umount + * The machine didn’t go into suspend and got stuck (LP: #2132095) + - platform/x86: alienware-wmi-wmax: Fix NULL pointer dereference in sleep + handlers + * CAP_PERFMON insufficient to get perf data (LP: #2131046) + - SAUCE: perf/core: Allow CAP_PERFMON for paranoid level 4 + * Poweroff not working consistently after upgrading kernel 6.14.0-17.17 or + later (LP: #2115860) + - drm/amd: Unify shutdown() callback behavior + - drm/amd: Stop exporting amdgpu_device_ip_suspend() outside amdgpu_device + - drm/amd: Remove comment about handling errors in + amdgpu_device_ip_suspend_phase1() + - drm/amd: Don't always set IP block HW status to false + - drm/amd: Pass IP suspend errors up to callers + - drm/amd: Avoid evicting resources at S5 + * Re-enable INTEL_SKL_INT3472 for kernels >= 6.16 for Intel IPU camera + (LP: #2128792) + - Revert "UBUNTU: [Config] FTBFS: disable INTEL_SKL_INT3472" + - Revert "UBUNTU: SAUCE: platform/x86: int3472: Add handshake GPIO + function" + * Support Samsung S5K3J1 sensor for Intel MIPI camera (LP: #2121852) + - SAUCE: media: ipu-bridge: Support s5k3j1 sensor + * Questing update: v6.17.4 upstream stable release (LP: #2131259) + - fs: always return zero on success from replace_fd() + - fscontext: do not consume log entries when returning -EMSGSIZE + - btrfs: fix the incorrect max_bytes value for find_lock_delalloc_range() + - arm64: map [_text, _stext) virtual address range non-executable+read- + only + - rseq: Protect event mask against membarrier IPI + - statmount: don't call path_put() under namespace semaphore + - listmount: don't call path_put() under namespace semaphore + - clocksource/drivers/clps711x: Fix resource leaks in error paths + - memcg: skip cgroup_file_notify if spinning is not allowed + - page_pool: Fix PP_MAGIC_MASK to avoid crashing on some 32-bit arches + - PM: runtime: Update kerneldoc return codes + - dma-mapping: fix direction in dma_alloc direction traces + - cpufreq: Make drivers using CPUFREQ_ETERNAL specify transition latency + - nfsd: unregister with rpcbind when deleting a transport + - KVM: x86: Add helper to retrieve current value of user return MSR + - KVM: SVM: Emulate PERF_CNTR_GLOBAL_STATUS_SET for PerfMonV2 + - iio: frequency: adf4350: Fix ADF4350_REG3_12BIT_CLKDIV_MODE + - media: v4l2-subdev: Fix alloc failure check in + v4l2_subdev_call_state_try() + - asm-generic/io.h: Skip trace helpers if rwmmio events are disabled + - clk: npcm: select CONFIG_AUXILIARY_BUS + - clk: thead: th1520-ap: describe gate clocks with clk_gate + - clk: thead: th1520-ap: fix parent of padctrl0 clock + - clk: thead: Correct parent for DPU pixel clocks + - clk: renesas: r9a08g045: Add MSTOP for GPIO + - perf disasm: Avoid undefined behavior in incrementing NULL + - perf test trace_btf_enum: Skip if permissions are insufficient + - perf evsel: Avoid container_of on a NULL leader + - libperf event: Ensure tracing data is multiple of 8 sized + - clk: qcom: common: Fix NULL vs IS_ERR() check in qcom_cc_icc_register() + - clk: qcom: Select the intended config in QCS_DISPCC_615 + - perf parse-events: Handle fake PMUs in CPU terms + - clk: at91: peripheral: fix return value + - clk: renesas: cpg-mssr: Fix memory leak in cpg_mssr_reserved_init() + - perf: Completely remove possibility to override MAX_NR_CPUS + - perf drm_pmu: Fix fd_dir leaks in for_each_drm_fdinfo_in_dir() + - perf util: Fix compression checks returning -1 as bool + - rtc: x1205: Fix Xicor X1205 vendor prefix + - rtc: optee: fix memory leak on driver removal + - perf arm_spe: Correct setting remote access + - perf arm_spe: Correct memory level for remote access + - perf vendor events arm64 AmpereOneX: Fix typo - should be + l1d_cache_access_prefetches + - perf test: AMD IBS swfilt skip kernel tests if paranoia is >1 + - perf test shell lbr: Avoid failures with perf event paranoia + - perf trace: Fix IS_ERR() vs NULL check bug + - perf session: Fix handling when buffer exceeds 2 GiB + - perf test: Don't leak workload gopipe in PERF_RECORD_* + - perf evsel: Fix uniquification when PMU given without suffix + - perf test: Avoid uncore_imc/clockticks in uniquification test + - perf evsel: Ensure the fallback message is always written to + - perf build-id: Ensure snprintf string is empty when size is 0 + - clk: mediatek: mt8195-infra_ao: Fix parent for infra_ao_hdmi_26m + - clk: mediatek: clk-mux: Do not pass flags to + clk_mux_determine_rate_flags() + - clk: nxp: lpc18xx-cgu: convert from round_rate() to determine_rate() + - clk: nxp: Fix pll0 rate check condition in LPC18xx CGU driver + - clk: tegra: do not overallocate memory for bpmp clocks + - nfsd: fix assignment of ia_ctime.tv_nsec on delegated mtime update + - nfsd: ignore ATTR_DELEG when checking ia_valid before notify_change() + - vfs: add ATTR_CTIME_SET flag + - nfsd: use ATTR_CTIME_SET for delegated ctime updates + - nfsd: track original timestamps in nfs4_delegation + - nfsd: fix SETATTR updates for delegated timestamps + - nfsd: fix timestamp updates in CB_GETATTR + - tracing: Fix the bug where bpf_get_stackid returns -EFAULT on the ARM64 + - PM: core: Annotate loops walking device links as _srcu + - PM: core: Add two macros for walking device links + - PM: sleep: Do not wait on SYNC_STATE_ONLY device links + - cpufreq: tegra186: Set target frequency for all cpus in policy + - scsi: mvsas: Fix use-after-free bugs in mvs_work_queue + - perf bpf-filter: Fix opts declaration on older libbpfs + - scsi: ufs: sysfs: Make HID attributes visible + - mshv: Handle NEED_RESCHED_LAZY before transferring to guest + - perf bpf_counter: Fix handling of cpumap fixing hybrid + - ASoC: SOF: ipc4-topology: Correct the minimum host DMA buffer size + - ASoC: SOF: ipc4-topology: Account for different ChainDMA host buffer + size + - ASoC: SOF: Intel: hda-pcm: Place the constraint on period time instead + of buffer time + - LoongArch: Add cflag -fno-isolate-erroneous-paths-dereference + - LoongArch: Fix build error for LTO with LLVM-18 + - LoongArch: Init acpi_gbl_use_global_lock to false + - ASoC: SOF: Intel: Read the LLP via the associated Link DMA channel + - net: usb: lan78xx: Fix lost EEPROM read timeout error(-ETIMEDOUT) in + lan78xx_read_raw_eeprom + - net/mlx4: prevent potential use after free in mlx4_en_do_uc_filter() + - drm/xe/hw_engine_group: Fix double write lock release in error path + - drm/xe/i2c: Don't rely on d3cold.allowed flag in system PM path + - s390/cio: Update purge function to unregister the unused subchannels + - drm/vmwgfx: Fix a null-ptr access in the cursor snooper + - drm/vmwgfx: Fix Use-after-free in validation + - drm/vmwgfx: Fix copy-paste typo in validation + - net/sctp: fix a null dereference in sctp_disposition + sctp_sf_do_5_1D_ce() + - tcp: Don't call reqsk_fastopen_remove() in tcp_conn_request(). + - net: mscc: ocelot: Fix use-after-free caused by cyclic delayed work + - selftest: net: ovpn: Fix uninit return values + - ice: ice_adapter: release xa entry on adapter allocation failure + - net: fsl_pq_mdio: Fix device node reference leak in fsl_pq_mdio_probe + - tools build: Align warning options with perf + - perf python: split Clang options when invoking Popen + - tcp: take care of zero tp->window_clamp in tcp_set_rcvlowat() + - mailbox: zynqmp-ipi: Remove redundant mbox_controller_unregister() call + - mailbox: zynqmp-ipi: Remove dev.parent check in zynqmp_ipi_free_mboxes + - mailbox: zynqmp-ipi: Fix out-of-bounds access in mailbox cleanup loop + - mailbox: zynqmp-ipi: Fix SGI cleanup on unbind + - bpf: Fix metadata_dst leak __bpf_redirect_neigh_v{4,6} + - net: mdio: mdio-i2c: Hold the i2c bus lock during smbus transactions + - net: sparx5/lan969x: fix flooding configuration on bridge join/leave + - net/mlx5: Prevent tunnel mode conflicts between FDB and NIC IPsec tables + - net/mlx5e: Prevent tunnel reformat when tunnel mode not allowed + - mailbox: mtk-cmdq: Remove pm_runtime APIs from cmdq_mbox_send_data() + - drm/amdgpu: Add additional DCE6 SCL registers + - drm/amd/display: Add missing DCE6 SCL_HORZ_FILTER_INIT* SRIs + - drm/amd/display: Properly clear SCL_*_FILTER_CONTROL on DCE6 + - drm/amd/display: Properly disable scaling on DCE6 + - drm/amd/display: Disable scaling on DCE6 for now + - drm/amdkfd: Fix kfd process ref leaking when userptr unmapping + - net: pse-pd: tps23881: Fix current measurement scaling + - crypto: skcipher - Fix reqsize handling + - netfilter: nft_objref: validate objref and objrefmap expressions + - bridge: br_vlan_fill_forward_path_pvid: use br_vlan_group_rcu() + - selftests: netfilter: nft_fib.sh: fix spurious test failures + - selftests: netfilter: query conntrack state to check for port clash + resolution + - io_uring/zcrx: increment fallback loop src offset + - net: airoha: Fix loopback mode configuration for GDM2 port + - cifs: Fix copy_to_iter return value check + - smb: client: fix missing timestamp updates after utime(2) + - rtc: isl12022: Fix initial enable_irq/disable_irq balance + - cifs: Query EA $LXMOD in cifs_query_path_info() for WSL reparse points + - tpm_tis: Fix incorrect arguments in tpm_tis_probe_irq_single + - gpio: wcd934x: mark the GPIO controller as sleeping + - bpf: Avoid RCU context warning when unpinning htab with internal structs + - kbuild: always create intermediate vmlinux.unstripped + - kbuild: keep .modinfo section in vmlinux.unstripped + - kbuild: Restore pattern to avoid stripping .rela.dyn from vmlinux + - kbuild: Add '.rel.*' strip pattern for vmlinux + - s390: vmlinux.lds.S: Reorder sections + - s390/vmlinux.lds.S: Move .vmlinux.info to end of allocatable sections + - ACPICA: acpidump: drop ACPI_NONSTRING attribute from file_name + - ACPI: property: Fix buffer properties extraction for subnodes + - ACPI: TAD: Add missing sysfs_remove_group() for ACPI_TAD_RT + - ACPICA: Debugger: drop ACPI_NONSTRING attribute from name_seg + - ACPI: debug: fix signedness issues in read/write helpers + - ACPI: battery: Add synchronization between interface updates + - arm64: dts: qcom: msm8916: Add missing MDSS reset + - arm64: dts: qcom: msm8939: Add missing MDSS reset + - arm64: dts: qcom: sdm845: Fix slimbam num-channels/ees + - Revert "UBUNTU: SAUCE: arm64: dts: qcom: x1e80100-pmics: Disable pm8010 + by default" + - arm64: dts: qcom: x1e80100-pmics: Disable pm8010 by default + - arm64: dts: ti: k3-am62a-main: Fix main padcfg length + - arm64: dts: ti: k3-am62p: Fix supported hardware for 1GHz OPP + - arm64: kprobes: call set_memory_rox() for kprobe page + - arm64: mte: Do not flag the zero page as PG_mte_tagged + - ARM: AM33xx: Implement TI advisory 1.0.36 (EMU0/EMU1 pins state on + reset) + - ARM: OMAP2+: pm33xx-core: ix device node reference leaks in + amx3_idle_init + - firmware: arm_scmi: quirk: Prevent writes to string constants + - perf/arm-cmn: Fix CMN S3 DTM offset + - KVM: s390: Fix to clear PTE when discarding a swapped page + - KVM: arm64: Fix debug checking for np-guests using huge mappings + - KVM: arm64: Fix page leak in user_mem_abort() + - x86/kvm: Force legacy PCI hole to UC when overriding MTRRs for TDX/SNP + - KVM: SVM: Re-load current, not host, TSC_AUX on #VMEXIT from SEV-ES + guest + - KVM: TDX: Fix uninitialized error code for __tdx_bringup() + - dt-bindings: phy: rockchip-inno-csi-dphy: make power-domains non- + required + - xen: take system_transition_mutex on suspend + - xen/events: Cleanup find_virq() return codes + - xen/manage: Fix suspend error path + - xen/events: Return -EEXIST for bound VIRQs + - xen/events: Update virq_to_irq on migration + - firmware: exynos-acpm: fix PMIC returned errno + - firmware: meson_sm: fix device leak at probe + - media: cec: extron-da-hd-4k-plus: drop external-module make commands + - media: cx18: Add missing check after DMA map + - media: i2c: mt9p031: fix mbus code initialization + - media: i2c: mt9v111: fix incorrect type for ret + - media: mc: Fix MUST_CONNECT handling for pads with no links + - media: pci: ivtv: Add missing check after DMA map + - media: pci: mg4b: fix uninitialized iio scan data + - media: platform: mtk-mdp3: Add missing MT8188 compatible to comp_dt_ids + - media: s5p-mfc: remove an unused/uninitialized variable + - media: staging/ipu7: fix isys device runtime PM usage in firmware + closing + - media: uvcvideo: Avoid variable shadowing in uvc_ctrl_cleanup_fh + - media: venus: firmware: Use correct reset sequence for IRIS2 + - media: venus: pm_helpers: add fallback for the opp-table + - media: vivid: fix disappearing messages + - media: vsp1: Export missing vsp1_isp_free_buffer symbol + - media: ti: j721e-csi2rx: Use devm_of_platform_populate + - media: ti: j721e-csi2rx: Fix source subdev link creation + - media: lirc: Fix error handling in lirc_register() + - drm/exynos: exynos7_drm_decon: remove ctx->suspended + - drm/panthor: Fix memory leak in panthor_ioctl_group_create() + - drm/msm/a6xx: Fix PDC sleep sequence + - drm/rcar-du: dsi: Fix 1/2/3 lane support + - drm/nouveau: fix bad ret code in nouveau_bo_move_prep + - drm/xe/uapi: loosen used tracking restriction + - drm/amd/display: Incorrect Mirror Cositing + - drm/amd/display: Enable Dynamic DTBCLK Switch + - drm/amd/display: Fix unsafe uses of kernel mode FPU + - blk-crypto: fix missing blktrace bio split events + - btrfs: avoid potential out-of-bounds in btrfs_encode_fh() + - bus: mhi: ep: Fix chained transfer handling in read path + - bus: mhi: host: Do not use uninitialized 'dev' pointer in + mhi_init_irq_setup() + - cdx: Fix device node reference leak in cdx_msi_domain_init + - clk: qcom: tcsrcc-x1e80100: Set the bi_tcxo as parent to eDP refclk + - clk: samsung: exynos990: Use PLL_CON0 for PLL parent muxes + - clk: samsung: exynos990: Fix CMU_TOP mux/div bit widths + - clk: samsung: exynos990: Replace bogus divs with fixed-factor clocks + - copy_sighand: Handle architectures where sizeof(unsigned long) < + sizeof(u64) + - cpufreq: CPPC: Avoid using CPUFREQ_ETERNAL as transition delay + - cpufreq: intel_pstate: Fix object lifecycle issue in + update_qos_request() + - crypto: aspeed - Fix dma_unmap_sg() direction + - crypto: atmel - Fix dma_unmap_sg() direction + - crypto: rockchip - Fix dma_unmap_sg() nents value + - eventpoll: Replace rwlock with spinlock + - fbdev: Fix logic error in "offb" name match + - fs/ntfs3: Fix a resource leak bug in wnd_extend() + - fs: quota: create dedicated workqueue for quota_release_work + - fsnotify: pass correct offset to fsnotify_mmap_perm() + - fuse: fix possibly missing fuse_copy_finish() call in fuse_notify() + - fuse: fix livelock in synchronous file put from fuseblk workers + - gpio: mpfs: fix setting gpio direction to output + - i3c: Fix default I2C adapter timeout value + - iio/adc/pac1934: fix channel disable configuration + - iio: dac: ad5360: use int type to store negative error codes + - iio: dac: ad5421: use int type to store negative error codes + - iio: frequency: adf4350: Fix prescaler usage. + - iio: xilinx-ams: Fix AMS_ALARM_THR_DIRECT_MASK + - iio: xilinx-ams: Unmask interrupts after updating alarms + - init: handle bootloader identifier in kernel parameters + - iio: imu: inv_icm42600: Simplify pm_runtime setup + - iio: imu: inv_icm42600: Drop redundant pm_runtime reinitialization in + resume + - iio: imu: inv_icm42600: Avoid configuring if already pm_runtime + suspended + - iommu/vt-d: PRS isn't usable if PDS isn't supported + - ipmi: Rework user message limit handling + - ipmi:msghandler:Change seq_lock to a mutex + - kernel/sys.c: fix the racy usage of task_lock(tsk->group_leader) in + sys_prlimit64() paths + - KEYS: trusted_tpm1: Compare HMAC values in constant time + - kho: only fill kimage if KHO is finalized + - lib/genalloc: fix device leak in of_gen_pool_get() + - loop: fix backing file reference leak on validation error + - md: fix mssing blktrace bio split events + - of: unittest: Fix device reference count leak in + of_unittest_pci_node_verify + - openat2: don't trigger automounts with RESOLVE_NO_XDEV + - padata: Reset next CPU when reorder sequence wraps around + - parisc: don't reference obsolete termio struct for TC* constants + - parisc: Remove spurious if statement from raw_copy_from_user() + - nvme-pci: Add TUXEDO IBS Gen8 to Samsung sleep quirk + - pinctrl: samsung: Drop unused S3C24xx driver data + - PM: EM: Fix late boot with holes in CPU topology + - PM: hibernate: Fix hybrid-sleep + - PM: hibernate: Restrict GFP mask in power_down() + - power: supply: max77976_charger: fix constant current reporting + - powerpc/powernv/pci: Fix underflow and leak issue + - powerpc/pseries/msi: Fix potential underflow and leak issue + - pwm: berlin: Fix wrong register in suspend/resume + - pwm: Fix incorrect variable used in error message + - Revert "ipmi: fix msg stack when IPMI is disconnected" + - sched/deadline: Fix race in push_dl_task() + - scsi: hpsa: Fix potential memory leak in hpsa_big_passthru_ioctl() + - scsi: sd: Fix build warning in sd_revalidate_disk() + - sctp: Fix MAC comparison to be constant-time + - smb client: fix bug with newly created file in cached dir + - sparc64: fix hugetlb for sun4u + - sparc: fix error handling in scan_one_device() + - xtensa: simdisk: add input size check in proc_write_simdisk + - xsk: Harden userspace-supplied xdp_desc validation + - mtd: rawnand: fsmc: Default to autodetect buswidth + - mtd: nand: raw: gpmi: fix clocks when CONFIG_PM=N + - mmc: core: SPI mode remove cmd7 + - mmc: mmc_spi: multiple block read remove read crc ack + - memory: samsung: exynos-srom: Fix of_iomap leak in exynos_srom_probe + - memory: stm32_omm: Fix req2ack update test + - rtc: interface: Ensure alarm irq is enabled when UIE is enabled + - rtc: interface: Fix long-standing race when setting alarm + - rseq/selftests: Use weak symbol reference, not definition, to link with + glibc + - PCI: xilinx-nwl: Fix ECAM programming + - PCI: tegra: Convert struct tegra_msi mask_lock into raw spinlock + - PCI/sysfs: Ensure devices are powered for config reads + - PCI/IOV: Add PCI rescan-remove locking when enabling/disabling SR-IOV + - PCI/ERR: Fix uevent on failure to recover + - PCI/AER: Fix missing uevent on recovery when a reset is requested + - PCI/AER: Support errors introduced by PCIe r6.0 + - PCI: Ensure relaxed tail alignment does not increase min_align + - PCI: Fix failure detection during resource resize + - PCI: j721e: Fix module autoloading + - PCI: j721e: Fix programming sequence of "strap" settings + - PCI: keystone: Use devm_request_irq() to free "ks-pcie-error-irq" on + exit + - PCI: rcar-gen4: Fix PHY initialization + - PCI: rcar-host: Drop PMSR spinlock + - PCI: rcar-host: Convert struct rcar_msi mask_lock into raw spinlock + - PCI: tegra194: Fix broken tegra_pcie_ep_raise_msi_irq() + - PCI: tegra194: Handle errors in BPMP response + - PCI: tegra194: Reset BARs when running in PCIe endpoint mode + - PCI/pwrctrl: Fix device leak at registration + - PCI/pwrctrl: Fix device and OF node leak at bus scan + - PCI/pwrctrl: Fix device leak at device stop + - spi: cadence-quadspi: Flush posted register writes before INDAC access + - spi: cadence-quadspi: Flush posted register writes before DAC access + - spi: cadence-quadspi: Fix cqspi_setup_flash() + - xfs: use deferred intent items for reaping crosslinked blocks + - x86/fred: Remove ENDBR64 from FRED entry points + - x86/umip: Check that the instruction opcode is at least two bytes + - x86/umip: Fix decoding of register forms of 0F 01 (SGDT and SIDT + aliases) + - mptcp: pm: in-kernel: usable client side with C-flag + - mptcp: reset blackhole on success with non-loopback ifaces + - selftests: mptcp: join: validate C-flag + def limit + - s390/cio/ioasm: Fix __xsch() condition code handling + - s390/dasd: enforce dma_alignment to ensure proper buffer validation + - s390/dasd: Return BLK_STS_INVAL for EINVAL from do_dasd_request + - s390: Add -Wno-pointer-sign to KBUILD_CFLAGS_DECOMPRESSOR + - slab: prevent warnings when slab obj_exts vector allocation fails + - slab: mark slab->obj_exts allocation failures unconditionally + - wifi: ath11k: HAL SRNG: don't deinitialize and re-initialize again + - wifi: iwlwifi: Fix dentry reference leak in iwl_mld_add_link_debugfs + - wifi: rtw89: avoid possible TX wait initialization race + - wifi: mt76: mt7925u: Add VID/PID for Netgear A9000 + - wifi: mt76: mt7921u: Add VID/PID for Netgear A7500 + - mm/thp: fix MTE tag mismatch when replacing zero-filled subpages + - mm/rmap: fix soft-dirty and uffd-wp bit loss when remapping zero-filled + mTHP subpage to shared zeropage + - mm/page_alloc: only set ALLOC_HIGHATOMIC for __GPF_HIGH allocations + - mm/hugetlb: early exit from hugetlb_pages_alloc_boot() when + max_huge_pages=0 + - mm/damon/vaddr: do not repeat pte_offset_map_lock() until success + - mm/damon/lru_sort: use param_ctx for damon_attrs staging + - nfsd: decouple the xprtsec policy check from check_nfsd_access() + - NFSD: Fix destination buffer size in nfsd4_ssc_setup_dul() + - nfsd: nfserr_jukebox in nlm_fopen should lead to a retry + - media: iris: Call correct power off callback in cleanup path + - media: iris: Fix firmware reference leak and unmap memory after load + - media: iris: fix module removal if firmware download failed + - media: iris: vpu3x: Add MNoC low power handshake during hardware power- + off + - media: iris: Fix port streaming handling + - media: iris: Fix buffer count reporting in internal buffer check + - media: iris: Allow substate transition to load resources during output + streaming + - media: iris: Always destroy internal buffers on firmware release + response + - media: iris: Simplify session stop logic by relying on vb2 checks + - media: iris: Update vbuf flags before v4l2_m2m_buf_done + - media: iris: Send dummy buffer address for all codecs during drain + - media: iris: Fix missing LAST flag handling during drain + - media: iris: Fix format check for CAPTURE plane in try_fmt + - media: iris: Allow stop on firmware only if start was issued. + - ext4: add ext4_sb_bread_nofail() helper function for + ext4_free_branches() + - ext4: fail unaligned direct IO write with EINVAL + - ext4: verify orphan file size is not too big + - ext4: increase i_disksize to offset + len in + ext4_update_disksize_before_punch() + - ext4: correctly handle queries for metadata mappings + - ext4: avoid potential buffer over-read in parse_apply_sb_mount_options() + - ext4: fix an off-by-one issue during moving extents + - ext4: guard against EA inode refcount underflow in xattr update + - ext4: validate ea_ino and size in check_xattrs + - ACPICA: Allow to skip Global Lock initialization + - ext4: free orphan info with kvfree + - ipmi: Fix handling of messages with provided receive message pointer + - Squashfs: add additional inode sanity checking + - Squashfs: reject negative file sizes in squashfs_read_inode() + - mm/ksm: fix incorrect KSM counter handling in mm_struct during fork + - media: mc: Clear minor number before put device + - arm64: dts: qcom: qcs615: add missing dt property in QUP SEs + - ACPI: property: Disregard references in data-only subnode lists + - ACPI: property: Add code comments explaining what is going on + - ACPI: property: Do not pass NULL handles to acpi_attach_data() + - irqchip/sifive-plic: Avoid interrupt ID 0 handling during suspend/resume + - copy_file_range: limit size if in compat mode + - minixfs: Verify inode mode when loading from disk + - pid: Add a judgment for ns null in pid_nr_ns + - fs: Add 'initramfs_options' to set initramfs mount options + - cramfs: Verify inode mode when loading from disk + - nsfs: validate extensible ioctls + - mnt_ns_tree_remove(): DTRT if mnt_ns had never been added to mnt_ns_list + - writeback: Avoid softlockup when switching many inodes + - writeback: Avoid excessively long inode switching times + - iomap: error out on file IO when there is no inline_data buffer + - pidfs: validate extensible ioctls + - mount: handle NULL values in mnt_ns_release() + - Linux 6.17.4 + * Questing update: v6.17.4 upstream stable release (LP: #2131259) // Race + condition in perf build causes build failure due to missing unistd_64.h + header on arm64 (LP: #2131702) + - perf tools: Fix arm64 libjvmti build by generating unistd_64.h + * Questing update: v6.17.3 upstream stable release (LP: #2129610) + - arch: copy_thread: pass clone_flags as u64 + - filelock: add FL_RECLAIM to show_fl_flags() macro + - init: INITRAMFS_PRESERVE_MTIME should depend on BLK_DEV_INITRD + - pid: use ns_capable_noaudit() when determining net sysctl permissions + - Fix CC_HAS_ASM_GOTO_OUTPUT on non-x86 architectures + - [Config]: Update CC configs for v6.17.3 + - seccomp: Fix a race with WAIT_KILLABLE_RECV if the tracer replies too + fast + - kbuild: Add missing $(objtree) prefix to powerpc crtsavres.o artifact + - selftests: arm64: Check fread return value in exec_target + - selftests: arm64: Fix -Waddress warning in tpidr2 test + - kselftest/arm64/gcs: Correctly check return value when disabling GCS + - hfsplus: fix slab-out-of-bounds read in hfsplus_uni2asc() + - gfs2: Fix GLF_INVALIDATE_IN_PROGRESS flag clearing in do_xmote + - gfs2: Remove space before newline + - gfs2: Further sanitize lock_dlm.c + - gfs2: Fix LM_FLAG_TRY* logic in add_to_queue + - gfs2: Remove duplicate check in do_xmote + - gfs2: Get rid of GLF_INVALIDATE_IN_PROGRESS + - gfs2: do_xmote cleanup + - gfs2: Add proper lockspace locking + - powerpc/8xx: Remove left-over instruction and comments in + DataStoreTLBMiss handler + - powerpc/603: Really copy kernel PGD entries into all PGDIRs + - powerpc/ftrace: ensure ftrace record ops are always set for NOPs + - powerpc64/modules: correctly iterate over stubs in + setup_ftrace_ool_stubs + - uprobes: uprobe_warn should use passed task + - raid6: riscv: Clean up unused header file inclusion + - coresight: trbe: Prevent overflow in PERF_IDX2OFF() + - perf: arm_spe: Prevent overflow in PERF_IDX2OFF() + - erofs: avoid reading more for fragment maps + - smb: client: fix sending the iwrap custom IRD/ORD negotiation messages + - smb: server: fix IRD/ORD negotiation with the client + - perf/x86/intel: Use early_initcall() to hook bts_init() + - perf/x86/intel: Fix IA32_PMC_x_CFG_B MSRs access error + - x86/vdso: Fix output operand size of RDPID + - selftests: cgroup: Make test_pids backwards compatible + - sched/fair: Get rid of sched_domains_curr_level hack for tl->cpumask() + - [Config]: Update CONFIG_SCHED_MC for v6.17.3 + - lsm: CONFIG_LSM can depend on CONFIG_SECURITY + - cpuset: fix failure to enable isolated partition when containing + isolcpus + - btrfs: return any hit error from extent_writepage_io() + - btrfs: fix symbolic link reading when bs > ps + - pinctrl: renesas: rzg2l: Fix invalid unsigned return in rzg3s_oen_read() + - arm64: dts: renesas: rzg2lc-smarc: Disable CAN-FD channel0 + - bpf: Tidy verifier bug message + - regmap: Remove superfluous check for !config in __regmap_init() + - selftests/bpf: Copy test_kmods when installing selftest + - rust: cpumask: Mark CpumaskVar as transparent + - bpf/selftests: Fix test_tcpnotify_user + - bpf: Remove migrate_disable in kprobe_multi_link_prog_run + - libbpf: Fix reuse of DEVMAP + - tools/nolibc: fix error return value of clock_nanosleep() + - ARM: dts: renesas: porter: Fix CAN pin group + - leds: max77705: Function return instead of variable assignment + - leds: flash: leds-qcom-flash: Update torch current clamp setting + - s390/bpf: Do not write tail call counter into helper and kfunc frames + - s390/bpf: Write back tail call counter for BPF_PSEUDO_CALL + - s390/bpf: Write back tail call counter for BPF_TRAMP_F_CALL_ORIG + - cpufreq: scmi: Account for malformed DT in scmi_dev_used_by_cpus() + - arm64: dts: renesas: sparrow-hawk: Invert microSD voltage selector on + EVTB1 + - arm64: dts: renesas: sparrow-hawk: Set VDDQ18_25_AVB voltage on EVTB1 + - libbpf: Export bpf_object__prepare symbol + - firmware: arm_scmi: Mark VirtIO ready before registering + scmi_virtio_driver + - arm64: dts: imx93-kontron: Fix GPIO for panel regulator + - arm64: dts: imx93-kontron: Fix USB port assignment + - arm64: dts: imx95: Correct the lpuart7 and lpuart8 srcid + - bpf: Remove preempt_disable in bpf_try_get_buffers + - ACPI: processor: idle: Fix memory leak when register cpuidle device + failed + - genirq: Add irq_chip_(startup/shutdown)_parent() + - PCI/MSI: Add startup/shutdown for per device domains + - irqchip/sg2042-msi: Fix broken affinity setting + - scripts/misc-check: update export checks for EXPORT_SYMBOL_FOR_MODULES() + - soc: qcom: rpmh-rsc: Unconditionally clear _TRIGGER bit for TCS + - pinctrl: meson-gxl: add missing i2c_d pinmux + - blk-mq: check kobject state_in_sysfs before deleting in + blk_mq_unregister_hctx + - selftests/futex: Remove the -g parameter from futex_priv_hash + - ARM: at91: pm: fix MCKx restore routine + - arm64: dts: apple: t8103-j457: Fix PCIe ethernet iommu-map + - regulator: scmi: Use int type to store negative error codes + - selftests/futex: Fix some futex_numa_mpol subtests + - tools/nolibc: avoid error in dup2() if old fd equals new fd + - selftests/nolibc: fix EXPECT_NZ macro + - leds: leds-lp55xx: Use correct address for memory programming + - PCI/MSI: Check MSI_FLAG_PCI_MSI_MASK_PARENT in + cond_[startup|shutdown]_parent() + - block: use int to store blk_stack_limits() return value + - ARM: dts: stm32: stm32mp151c-plyaqm: Use correct dai-format property + - dt-bindings: vendor-prefixes: Add undocumented vendor prefixes + - genirq/test: Fix depth tests on architectures with NOREQUEST by default. + - genirq/test: Select IRQ_DOMAIN + - genirq/test: Depend on SPARSE_IRQ + - genirq/test: Drop CONFIG_GENERIC_IRQ_MIGRATION assumptions + - genirq/test: Ensure CPU 1 is online for hotplug test + - selftests/bpf: Fix count write in testapp_xdp_metadata_copy() + - vdso/datastore: Gate time data behind CONFIG_GENERIC_GETTIMEOFDAY + - PM: sleep: core: Clear power.must_resume in noirq suspend error path + - blk-mq: fix elevator depth_updated method + - vdso: Add struct __kernel_old_timeval forward declaration to gettime.h + - ARM: dts: ti: omap: am335x-baltos: Fix ti,en-ck32k-xtal property in DTS + to use correct boolean syntax + - ARM: dts: ti: omap: omap3-devkit8000-lcd: Fix ti,keep-vref-on property + to use correct boolean syntax in DTS + - ARM: dts: omap: am335x-cm-t335: Remove unused mcasp num-serializer + property + - PM / devfreq: mtk-cci: Fix potential error pointer dereference in + probe() + - power: supply: cw2015: Fix a alignment coding style issue + - hwmon: (asus-ec-sensors) Narrow lock for X870E-CREATOR WIFI + - pinctrl: renesas: Use int type to store negative error codes + - pinctrl: eswin: Fix regulator error check and Kconfig dependency + - null_blk: Fix the description of the cache_size module argument + - blk-throttle: fix access race during throttle policy activation + - selftests: vDSO: Fix -Wunitialized in powerpc VDSO_CALL() wrapper + - selftests: vDSO: vdso_test_abi: Correctly skip whole test with missing + vDSO + - irqchip/gic-v5: Fix loop in gicv5_its_create_itt_two_level() cleanup + path + - irqchip/gic-v5: Fix error handling in gicv5_its_irq_domain_alloc() + - tick: Do not set device to detached state in tick_shutdown() + - arm64: dts: mediatek: mt8195: Remove suspend-breaking reset from pcie0 + - arm64: dts: mediatek: mt8183: Fix out of range pull values + - nbd: restrict sockets to TCP and UDP + - PM / devfreq: rockchip-dfi: double count on RK3588 + - firmware: firmware: meson-sm: fix compile-test default + - dts: arm: amlogic: fix pwm node for c3 + - soc: mediatek: mtk-svs: fix device leaks on mt8183 probe failure + - soc: mediatek: mtk-svs: fix device leaks on mt8192 probe failure + - cpuidle: qcom-spm: fix device and OF node leaks at probe + - block: cleanup bio_issue + - block: initialize bio issue time in blk_mq_submit_bio() + - block: factor out a helper bio_submit_split_bioset() + - block: skip unnecessary checks for split bio + - block: fix ordering of recursive split IO + - blk-mq: remove useless checkings in blk_mq_update_nr_requests() + - blk-mq: check invalid nr_requests in queue_requests_store() + - blk-mq: convert to serialize updating nr_requests with + update_nr_hwq_lock + - blk-mq: cleanup shared tags case in blk_mq_update_nr_requests() + - blk-mq: split bitmap grow and resize case in blk_mq_update_nr_requests() + - blk-mq-sched: add new parameter nr_requests in blk_mq_alloc_sched_tags() + - blk-mq: fix potential deadlock while nr_requests grown + - arm64: dts: allwinner: a527: cubie-a5e: Add ethernet PHY reset setting + - arm64: dts: allwinner: t527: avaota-a1: Add ethernet PHY reset setting + - arm64: dts: rockchip: Add RTC on rk3576-evb1-v10 + - arm64: dts: rockchip: Add WiFi on rk3576-evb1-v10 + - arm64: dts: rockchip: Fix network on rk3576 evb1 board + - arm64: dts: ti: k3-j742s2-mcu-wakeup: Override firmware-name for MCU R5F + cores + - arm64: dts: ti: k3: Rename rproc reserved-mem nodes to 'memory@addr' + - Revert "arm64: dts: ti: k3-j721e-sk: Fix reversed C6x carveout + locations" + - Revert "arm64: dts: ti: k3-j721e-beagleboneai64: Fix reversed C6x + carveout locations" + - arm64: dts: mediatek: mt8188: Change efuse fallback compatible to mt8186 + - arm64: dts: mediatek: mt8186-tentacruel: Fix touchscreen model + - arm64: dts: ti: k3-pinctrl: Fix the bug in existing macros + - arm64: dts: renesas: r9a09g047e57-smarc: Fix gpio key's pin control node + - arm64: dts: mediatek: mt6331: Fix pmic, regulators, rtc, keys node names + - mmc: core: Fix variable shadowing in mmc_route_rpmb_frames() + - arm64: dts: mediatek: mt6795-xperia-m5: Fix mmc0 latch-ck value + - arm64: dts: mediatek: mt7986a: Fix PCI-Express T-PHY node address + - arm64: dts: mediatek: mt8395-kontron-i1200: Fix MT6360 regulator nodes + - arm64: dts: mediatek: mt8516-pumpkin: Fix machine compatible + - arm64: dts: allwinner: a527: cubie-a5e: Add LEDs + - arm64: dts: allwinner: a527: cubie-a5e: Drop external 32.768 KHz crystal + - arm64: dts: allwinner: t527: avaota-a1: hook up external 32k crystal + - arm64: dts: allwinner: t527: orangepi-4a: hook up external 32k crystal + - pwm: tiehrpwm: Don't drop runtime PM reference in .free() + - pwm: tiehrpwm: Make code comment in .free() more useful + - pwm: tiehrpwm: Fix various off-by-one errors in duty-cycle calculation + - pwm: tiehrpwm: Fix corner case in clock divisor calculation + - ACPICA: Apply ACPI_NONSTRING + - ACPICA: Fix largest possible resource descriptor index + - riscv, bpf: Sign extend struct ops return values properly + - nvme-auth: update bi_directional flag + - nvmet-fc: move lsop put work to nvmet_fc_ls_req_op + - nvmet-fcloop: call done callback even when remote port is gone + - nvme-tcp: send only permitted commands for secure concat + - i3c: master: svc: Use manual response for IBI events + - i3c: master: svc: Recycle unused IBI slot + - block: update validation of atomic writes boundary for stacked devices + - block: fix stacking of atomic writes when atomics are not supported + - selftests: watchdog: skip ping loop if WDIOF_KEEPALIVEPING not supported + - selftests/kselftest_harness: Add harness-selftest.expected to TEST_FILES + - blk-throttle: fix throtl_data leak during disk release + - bpf: Explicitly check accesses to bpf_sock_addr + - mmc: select REGMAP_MMIO with MMC_LOONGSON2 + - selftests/futex: Fix futex_wait() for 32bit ARM + - selftest/futex: Make the error check more precise for futex_numa_mpol + - selftest/futex: Compile also with libnuma < 2.0.16 + - bpf: dont report verifier bug for missing bpf_scc_visit on speculative + path + - bpf, arm64: Call bpf_jit_binary_pack_finalize() in bpf_jit_free() + - arm64: dts: apple: t600x: Add missing WiFi properties + - arm64: dts: apple: t600x: Add bluetooth device nodes + - arm64: dts: apple: Add ethernet0 alias for J375 template + - selftests: always install UAPI headers to the correct directory + - smp: Fix up and expand the smp_call_function_many() kerneldoc + - mfd: max77705: max77705_charger: move active discharge setting to mfd + parent + - power: supply: max77705_charger: refactoring: rename charger to chg + - power: supply: max77705_charger: use regfields for config registers + - power: supply: max77705_charger: rework interrupts + - tools/nolibc: make time_t robust if __kernel_old_time_t is missing in + host headers + - spi: fix return code when spi device has too many chipselects + - clocksource/drivers/timer-tegra186: Avoid 64-bit divide operation + - clocksource/drivers/tegra186: Avoid 64-bit division + - bpf: Mark kfuncs as __noclone + - once: fix race by moving DO_ONCE to separate section + - hwmon: (mlxreg-fan) Separate methods of fan setting coming from + different subsystems + - tools/nolibc: add stdbool.h to nolibc includes + - thermal/drivers/qcom: Make LMH select QCOM_SCM + - thermal/drivers/qcom/lmh: Add missing IRQ includes + - i2c: mediatek: fix potential incorrect use of I2C_MASTER_WRRD + - i2c: spacemit: ensure bus release check runs when wait_bus_idle() fails + - i2c: spacemit: remove stop function to avoid bus error + - i2c: spacemit: disable SDA glitch fix to avoid restart delay + - i2c: spacemit: check SDA instead of SCL after bus reset + - i2c: spacemit: ensure SDA is released after bus reset + - i2c: designware: Fix clock issue when PM is disabled + - i2c: designware: Add disabling clocks when probe fails + - libbpf: Fix error when st-prefix_ops and ops from differ btf + - bpf: Enforce expected_attach_type for tailcall compatibility + - i3c: fix big-endian FIFO transfers + - mfd: max77705: Setup the core driver as an interrupt controller + - drm/sched: Fix a race in DRM_GPU_SCHED_STAT_NO_HANG test + - drm/panel-edp: Add disable to 100ms for MNB601LS1-4 + - drm/display: bridge-connector: correct CEC bridge pointers in + drm_bridge_connector_init + - drm/panel-edp: Add 50ms disable delay for four panels + - drm/vmwgfx: fix missing assignment to ts + - drm/amd/display: Reduce Stack Usage by moving 'audio_output' into + 'stream_res' v4 + - drm/panel: novatek-nt35560: Fix invalid return value + - drm/amdgpu: fix link error for !PM_SLEEP + - drm/amdgpu: Fix jpeg v4.0.3 poison irq call trace on sriov guest + - drm/amdgpu: Fix vcn v4.0.3 poison irq call trace on sriov guest + - PCI: endpoint: pci-ep-msi: Fix NULL vs IS_ERR() check in + pci_epf_write_msi_msg() + - PCI: xgene-msi: Return negative -EINVAL in xgene_msi_handler_setup() + - drm/radeon/r600_cs: clean up of dead code in r600_cs + - f2fs: fix condition in __allow_reserved_blocks() + - f2fs: fix to avoid overflow while left shift operation + - f2fs: fix to zero data after EOF for compressed file correctly + - drm/bridge: it6505: select REGMAP_I2C + - wifi: rtw88: Lock rtwdev->mutex before setting the LED + - HID: steelseries: refactor probe() and remove() + - media: zoran: Remove zoran_fh structure + - phy: rockchip: naneng-combphy: Enable U3 OTG port for RK3568 + - drm/bridge: cdns-dsi: Fix the _atomic_check() + - usb: host: max3421-hcd: Fix error pointer dereference in probe cleanup + - usb: misc: qcom_eud: Access EUD_MODE_MANAGER2 through secure calls + - PCI/pwrctrl: Fix double cleanup on devm_add_action_or_reset() failure + - misc: pci_endpoint_test: Fix array underflow in + pci_endpoint_test_ioctl() + - serial: max310x: Add error checking in probe() + - drm/amd/display: Remove redundant semicolons + - drm/amd/display: Add NULL pointer checks in dc_stream cursor attribute + functions + - crypto: keembay - Add missing check after sg_nents_for_len() + - hwrng: nomadik - add ARM_AMBA dependency + - docs: iio: ad3552r: Fix malformed code-block directive + - fwctl/mlx5: Fix memory alloc/free in mlx5ctl_fw_rpc() + - scsi: pm80xx: Restore support for expanders + - scsi: pm80xx: Fix array-index-out-of-of-bounds on rmmod + - scsi: libsas: Add dev_parent_is_expander() helper + - scsi: pm80xx: Use dev_parent_is_expander() helper + - scsi: pm80xx: Add helper function to get the local phy id + - scsi: pm80xx: Fix pm8001_abort_task() for chip_8006 when using an + expander + - mptcp: Fix up subflow's memcg when CONFIG_SOCK_CGROUP_DATA=n. + - scsi: myrs: Fix dma_alloc_coherent() error check + - f2fs: fix to clear unusable_cap for checkpoint=enable + - f2fs: fix to avoid NULL pointer dereference in + f2fs_check_quota_consistency() + - f2fs: fix to allow removing qf_name + - Revert "UBUNTU: SAUCE: drm/dp: drm_edp_backlight_set_level: do not + always send 3-byte commands" + - drm/dp: drm_edp_backlight_set_level: do not always send 3-byte commands + - crypto: octeontx2 - Call strscpy() with correct size argument + - drm: re-allow no-op changes on non-primary planes in async flips + - media: rj54n1cb0c: Fix memleak in rj54n1_probe() + - media: staging/ipu7: convert to use pci_alloc_irq_vectors() API + - media: staging/ipu7: Don't set name for IPU7 PCI device + - media: staging/ipu7: cleanup the MMU correctly in IPU7 driver release + - media: i2c: vd55g1: Fix duster register address + - drm/panel: Allow powering on panel follower after panel is enabled + - HID: i2c-hid: Make elan touch controllers power on after panel is + enabled + - RDMA/mlx5: Better estimate max_qp_wr to reflect WQE count + - RDMA/mlx5: Fix vport loopback forcing for MPV device + - wifi: rtw88: Use led->brightness_set_blocking for PCI too + - net: phy: introduce phy_id_compare_vendor() PHY ID helper + - net: phy: as21xxx: better handle PHY HW reset on soft-reboot + - PCI: rcar-host: Pass proper IRQ domain to generic_handle_domain_irq() + - fuse: remove unneeded offset assignment when filling write pages + - PCI: qcom: Restrict port parsing only to PCIe bridge child nodes + - cdx: don't select CONFIG_GENERIC_MSI_IRQ + - PCI/ACPI: Fix pci_acpi_preserve_config() memory leak + - HID: i2c-hid: Fix test in i2c_hid_core_register_panel_follower() + - ALSA: lx_core: use int type to store negative error codes + - media: st-delta: avoid excessive stack usage + - drm/amdgpu/vcn: Add regdump helper functions + - drm/amdgpu/vcn: Hold pg_lock before vcn power off + - drm/amdgpu: Check vcn state before profile switch + - accel/amdxdna: Use int instead of u32 to store error codes + - efi: Explain OVMF acronym in OVMF_DEBUG_LOG help text + - net: dst: introduce dst->dev_rcu + - ipv6: mcast: Add ip6_mc_find_idev() helper + - ipv6: start using dst_dev_rcu() + - ipv6: use RCU in ip6_xmit() + - ipv6: use RCU in ip6_output() + - net: use dst_dev_rcu() in sk_setup_caps() + - tcp_metrics: use dst_dev_net_rcu() + - ipv4: start using dst_dev_rcu() + - crypto: hisilicon/zip - remove unnecessary validation for high- + performance mode configurations + - crypto: hisilicon - re-enable address prefetch after device resuming + - crypto: hisilicon - check the sva module status while enabling or + disabling address prefetch + - crypto: hisilicon/qm - check whether the input function and PF are on + the same device + - crypto: hisilicon/qm - request reserved interrupt for virtual function + - inet: ping: check sock_net() in ping_get_port() and ping_lookup() + - dmaengine: Fix dma_async_tx_descriptor->tx_submit documentation + - coresight: trbe: Add ISB after TRBLIMITR write + - coresight: Fix missing include for FIELD_GET + - coresight: Only register perf symlink for sinks with alloc_buffer + - drm/amdgpu: Power up UVD 3 for FW validation (v2) + - drm/amd/pm: Disable ULV even if unsupported (v3) + - drm/amd/pm: Fix si_upload_smc_data (v3) + - drm/amd/pm: Adjust si_upload_smc_data register programming (v3) + - drm/amd/pm: Treat zero vblank time as too short in si_dpm (v3) + - drm/amd/pm: Disable MCLK switching with non-DC at 120 Hz+ (v2) + - drm/amd/pm: Disable SCLK switching on Oland with high pixel clocks (v3) + - wifi: mac80211: Make CONNECTION_MONITOR optional for MLO sta + - wifi: mwifiex: send world regulatory domain to driver + - wifi: brcmfmac: fix 43752 SDIO FWVID incorrectly labelled as Cypress + (CYW) + - drm/msm: Do not validate SSPP when it is not ready + - PCI: tegra: Fix devm_kcalloc() argument order for port->phys allocation + - wifi: mac80211: consider links for validating SCAN_FLAG_AP in scan + request during MLO + - PCI: qcom: Add equalization settings for 8.0 GT/s and 32.0 GT/s + - tcp: fix __tcp_close() to only send RST when required + - fanotify: Validate the return value of mnt_ns_from_dentry() before + dereferencing + - drm/amdkfd: Fix error code sign for EINVAL in svm_ioctl() + - usb: phy: twl6030: Fix incorrect type for ret + - usb: gadget: configfs: Correctly set use_os_string at bind + - tty: n_gsm: Don't block input queue by waiting MSC + - misc: genwqe: Fix incorrect cmd field being reported in error + - pps: fix warning in pps_register_cdev when register device fail + - drm/msm: Fix obj leak in VM_BIND error path + - drm/msm: Fix missing VM_BIND offset/range validation + - wifi: iwlwifi: Remove redundant header files + - drm/msm/mdp4: stop supporting no-IOMMU configuration + - drm/msm: stop supporting no-IOMMU configuration + - idpf: fix Rx descriptor ready check barrier in splitq + - ASoC: Intel: bytcht_es8316: Fix invalid quirk input mapping + - ASoC: Intel: bytcr_rt5640: Fix invalid quirk input mapping + - ASoC: Intel: bytcr_rt5651: Fix invalid quirk input mapping + - ipv6: snmp: do not use SNMP_MIB_SENTINEL anymore + - ipv6: snmp: do not track per idev ICMP6_MIB_RATELIMITHOST + - drm/msm: Fix bootup splat with separate_gpu_drm modparam + - drm/msm/dpu: fix incorrect type for ret + - wifi: mac80211: fix reporting of all valid links in sta_set_sinfo() + - fs: ntfs3: Fix integer overflow in run_unpack() + - fs/ntfs3: reject index allocation if $BITMAP is empty but blocks exist + - iio: consumers: Fix handling of negative channel scale in + iio_convert_raw_to_processed() + - iio: consumers: Fix offset handling in iio_convert_raw_to_processed() + - mm/slub: Fix cmp_loc_by_count() to return 0 when counts are equal + - tools: ynl: fix undefined variable name + - RDMA/mlx5: Fix page size bitmap calculation for KSM mode + - netfilter: ipset: Remove unused htable_bits in macro ahash_region + - ipvs: Use READ_ONCE/WRITE_ONCE for ipvs->enable + - HID: steelseries: Fix STEELSERIES_SRWS1 handling in steelseries_remove() + - watchdog: intel_oc_wdt: Do not try to write into const memory + - watchdog: mpc8xxx_wdt: Reload the watchdog timer when enabling the + watchdog + - PCI: endpoint: pci-epf-test: Fix doorbell test support + - drivers/base/node: handle error properly in register_one_node() + - RDMA/cm: Rate limit destroy CM ID timeout error message + - wifi: mt76: mt7996: Fix mt7996_mcu_sta_ba wcid configuration + - wifi: mt76: mt7996: Fix mt7996_mcu_bss_mld_tlv routine + - wifi: mt76: fix potential memory leak in mt76_wmac_probe() + - wifi: mt76: mt7996: Use proper link_id in link_sta_rc_update callback + - wifi: mt76: mt7996: Check phy before init msta_link in + mt7996_mac_sta_add_links() + - wifi: mt76: mt7996: Fix tx-queues initialization for second phy on + mt7996 + - wifi: mt76: mt7996: Fix RX packets configuration for primary WED device + - wifi: mt76: mt7996: Convert mt7996_wed_rro_addr to LE + - wifi: mt76: mt7915: fix mt7981 pre-calibration + - wifi: mt76: mt7996: remove redundant per-phy mac80211 calls during + restart + - ASoC: Intel: hda-sdw-bpt: set persistent_buffer false + - srcu/tiny: Remove preempt_disable/enable() in srcu_gp_start_if_needed() + - drm/amdgpu: Fix allocating extra dwords for rings (v2) + - f2fs: fix to update map->m_next_extent correctly in f2fs_map_blocks() + - f2fs: fix to truncate first page in error path of f2fs_truncate() + - f2fs: fix to avoid migrating empty section + - f2fs: fix to mitigate overhead of f2fs_zero_post_eof_page() + - RISC-V: KVM: Write hgatp register with valid mode bits + - ALSA: pcm: Disable bottom softirqs as part of spin_lock_irq() on + PREEMPT_RT + - ACPI: NFIT: Fix incorrect ndr_desc being reportedin dev_err message + - scsi: qla2xxx: edif: Fix incorrect sign of error code + - scsi: qla2xxx: Fix incorrect sign of error code in START_SP_W_RETRIES() + - scsi: qla2xxx: Fix incorrect sign of error code in qla_nvme_xmt_ls_rsp() + - HID: hidraw: tighten ioctl command parsing + - f2fs: fix zero-sized extent for precache extents + - smc: Fix use-after-free in __pnet_find_base_ndev(). + - smc: Use __sk_dst_get() and dst_dev_rcu() in in smc_clc_prfx_set(). + - smc: Use __sk_dst_get() and dst_dev_rcu() in smc_clc_prfx_match(). + - smc: Use __sk_dst_get() and dst_dev_rcu() in smc_vlan_by_tcpsk(). + - tls: Use __sk_dst_get() and dst_dev_rcu() in get_netdev_for_sock(). + - mptcp: Call dst_release() in mptcp_active_enable(). + - mptcp: Use __sk_dst_get() and dst_dev_rcu() in mptcp_active_enable(). + - Revert "usb: xhci: Avoid Stop Endpoint retry loop if the endpoint seems + Running" + - RDMA/core: Resolve MAC of next-hop device without ARP support + - IB/sa: Fix sa_local_svc_timeout_ms read race + - Documentation: trace: historgram-design: Separate sched_waking histogram + section heading and the following diagram + - ASoC: SOF: ipc4-pcm: Fix incorrect comparison with number of tdm_slots + - wifi: ath12k: initialize eirp_power before use + - wifi: ath12k: fix overflow warning on num_pwr_levels + - wifi: ath12k: fix signal in radiotap for WCN7850 + - wifi: ath12k: fix HAL_PHYRX_COMMON_USER_INFO handling in monitor mode + - wifi: ath12k: fix the fetching of combined rssi + - wifi: ath12k: Add fallback for invalid channel number in PHY metadata + - wifi: ath12k: fix wrong logging ID used for CE + - wifi: ath10k: avoid unnecessary wait for service ready message + - iommu/vt-d: debugfs: Fix legacy mode page table dump logic + - wifi: mac80211: fix Rx packet handling when pubsta information is not + available + - ASoC: Intel: sof_sdw: Prevent jump to NULL add_sidecar callback + - sparc: fix accurate exception reporting in copy_{from_to}_user for + UltraSPARC + - sparc: fix accurate exception reporting in copy_{from_to}_user for + UltraSPARC III + - sparc: fix accurate exception reporting in copy_{from_to}_user for + Niagara + - sparc: fix accurate exception reporting in copy_to_user for Niagara 4 + - sparc: fix accurate exception reporting in copy_{from,to}_user for M7 + - vfio/pds: replace bitmap_free with vfree + - crypto: comp - Use same definition of context alloc and free ops + - crypto: hisilicon/qm - set NULL to qm->debug.qm_diff_regs + - wifi: ath12k: Fix peer lookup in ath12k_dp_mon_rx_deliver_msdu() + - rpmsg: qcom_smd: Fix fallback to qcom,ipc parse + - remoteproc: qcom_q6v5_mss: support loading MBN file on msm8974 + - RDMA/rxe: Fix race in do_task() when draining + - selftests/mm: fix va_high_addr_switch.sh failure on x86_64 + - wifi: rtw89: fix leak in rtw89_core_send_nullfunc() + - wifi: rtw89: avoid circular locking dependency in ser_state_run() + - PCI: tegra194: Fix duplicate PLL disable in + pex_ep_event_pex_rst_assert() + - remoteproc: qcom: q6v5: Avoid disabling handover IRQ twice + - remoteproc: qcom: pas: Shutdown lite ADSP DTB on X1E + - wifi: ath12k: Refactor RX TID deletion handling into helper function + - wifi: ath12k: Fix flush cache failure during RX queue update + - wifi: cfg80211: fix width unit in cfg80211_radio_chandef_valid() + - dm vdo: return error on corrupted metadata in start_restoring_volume + functions + - coresight: fix indentation error in cscfg_remove_owned_csdev_configs() + - coresight-etm4x: Conditionally access register TRCEXTINSELR + - coresight: tmc: Support atclk + - coresight: catu: Support atclk + - coresight: etm4x: Support atclk + - coresight: Appropriately disable programming clocks + - coresight: Appropriately disable trace bus clocks + - coresight: Avoid enable programming clock duplicately + - coresight: trbe: Return NULL pointer for allocation failures + - coresight: tpda: fix the logic to setup the element size + - coresight: Fix incorrect handling for return value of devm_kzalloc + - NFSv4.1: fix backchannel max_resp_sz verification check + - net: ethtool: tsconfig: set command must provide a reply + - netfilter: nfnetlink: reset nlh pointer during batch replay + - netfilter: nf_conntrack: do not skip entries in /proc/net/nf_conntrack + - scsi: ufs: core: Fix data race in CPU latency PM QoS request handling + - scsi: mpt3sas: Fix crash in transport port remove by using ioc_info() + - usb: vhci-hcd: Prevent suspending virtually attached devices + - PCI: rcar-gen4: Add missing 1ms delay after PWR reset assertion + - PCI: rcar-gen4: Assure reset occurs before DBI access + - PCI: rcar-gen4: Fix inverted break condition in PHY initialization + - ASoC: qcom: sc8280xp: use sa8775p/ subdir for QCS9100 / QCS9075 + - iommu/vt-d: Disallow dirty tracking if incoherent page walk + - iommu/selftest: prevent use of uninitialized variable + - RDMA/siw: Always report immediate post SQ errors + - net: enetc: Fix probing error message typo for the ENETCv4 PF driver + - net: usb: Remove disruptive netif_wake_queue in rtl8150_set_multicast + - ptp: Add a upper bound on max_vclocks + - vhost: vringh: Fix copy_to_iter return value check + - net: macb: remove illusion about TBQPH/RBQPH being per-queue + - net: macb: move ring size computation to functions + - net: macb: single dma_alloc_coherent() for DMA descriptors + - Bluetooth: btintel_pcie: Refactor Device Coredump + - Bluetooth: MGMT: Fix not exposing debug UUID on + MGMT_OP_READ_EXP_FEATURES_INFO + - Bluetooth: ISO: Fix possible UAF on iso_conn_free + - Bluetooth: ISO: free rx_skb if not consumed + - Bluetooth: ISO: don't leak skb in ISO_CONT RX + - Bluetooth: hci_sync: Fix using random address for BIG/PA advertisements + - KEYS: X.509: Fix Basic Constraints CA flag parsing + - hwrng: ks-sa - fix division by zero in ks_sa_rng_init + - cramfs: fix incorrect physical page address calculation + - ocfs2: fix double free in user_cluster_connect() + - drivers/base/node: fix double free in register_one_node() + - f2fs: fix UAF issue in f2fs_merge_page_bio() + - mtd: rawnand: atmel: Fix error handling path in + atmel_nand_controller_add_nands + - PCI: j721e: Fix incorrect error message in probe() + - idpf: fix mismatched free function for dma_alloc_coherent + - tcp: use skb->len instead of skb->truesize in tcp_can_ingest() + - nfp: fix RSS hash key size when RSS is not supported + - net: ena: return 0 in ena_get_rxfh_key_size() when RSS hash key is not + configurable + - net: dlink: handle copy_thresh allocation failure + - net/mlx5: Stop polling for command response if interface goes down + - net/mlx5: pagealloc: Fix reclaim race during command interface teardown + - net/mlx5: fw reset, add reset timeout work + - smb: client: fix crypto buffers in non-linear memory + - bonding: fix xfrm offload feature setup on active-backup mode + - net: enetc: initialize SW PIR and CIR based HW PIR and CIR values + - iommufd: Register iommufd mock devices with fwspec + - Revert "net/mlx5e: Update and set Xon/Xoff upon MTU set" + - NFSD: filecache: add STATX_DIOALIGN and STATX_DIO_READ_ALIGN support + - nfs/localio: avoid issuing misaligned IO using O_DIRECT + - octeontx2-vf: fix bitmap leak + - octeontx2-pf: fix bitmap leak + - vhost: vringh: Modify the return value check + - selftests/bpf: Fix typos and grammar in test sources + - selftests/bpf: move get_ksyms and get_addrs to trace_helpers.c + - selftests/bpf: Fix realloc size in bpf_get_addrs + - bpf: Skip scalar adjustment for BPF_NEG if dst is a pointer + - bpf: Reject negative offsets for ALU ops + - tpm: Disable TPM2_TCG_HMAC by default + - ALSA: hda/hdmi: Add pin fix for HP ProDesk model + - ALSA: hda/realtek: Add quirk for HP Spectre 14t-ea100 + - Squashfs: fix uninit-value in squashfs_get_parent + - uio_hv_generic: Let userspace take care of interrupt mask + - hisi_acc_vfio_pci: Fix reference leak in hisi_acc_vfio_debug_init + - io_uring/waitid: always prune wait queue entry in io_waitid_wait() + - io_uring/zcrx: fix overshooting recv limit + - ASoC: wcd934x: fix error handling in wcd934x_codec_parse_data() + - ASoC: SOF: ipc3-topology: Fix multi-core and static pipelines tear down + - ASoC: codecs: wcd937x: set the comp soundwire port correctly + - ASoC: codecs: wcd937x: make stub functions inline + - ASoC: SOF: ipc4-pcm: fix delay calculation when DSP resamples + - ASoC: SOF: ipc4-pcm: fix start offset calculation for chain DMA + - fs: udf: fix OOB read in lengthAllocDescs handling + - net: nfc: nci: Add parameter validation for packet data + - mfd: rz-mtu3: Fix MTU5 NFCR register offset + - mfd: intel_soc_pmic_chtdc_ti: Set use_single_read regmap_config flag + - mfd: vexpress-sysreg: Check the return value of devm_gpiochip_add_data() + - tracing: Fix lock imbalance in s_start() memory allocation failure path + - tracing: Fix race condition in kprobe initialization causing NULL + pointer dereference + - tracing: Fix wakeup tracers on failure of acquiring calltime + - tracing: Fix irqoff tracers on failure of acquiring calltime + - tracing: Have trace_marker use per-cpu data to read user space + - tracing: Fix tracing_mark_raw_write() to use buf and not ubuf + - tracing: Stop fortify-string from warning in tracing_mark_raw_write() + - dm: fix queue start/stop imbalance under suspend/load/resume races + - dm: fix NULL pointer dereference in __dm_suspend() + - LoongArch: Automatically disable kaslr if boot from kexec_file + - pwm: loongson: Fix LOONGSON_PWM_FREQ_DEFAULT + - LoongArch: BPF: Sign-extend struct ops return values properly + - LoongArch: BPF: No support of struct argument in trampoline programs + - LoongArch: BPF: Don't align trampoline size + - LoongArch: BPF: Make trampoline size stable + - LoongArch: BPF: Make error handling robust in + arch_prepare_bpf_trampoline() + - LoongArch: BPF: Remove duplicated bpf_flush_icache() + - LoongArch: BPF: No text_poke() for kernel text + - LoongArch: BPF: Remove duplicated flags check + - LoongArch: BPF: Fix uninitialized symbol 'retval_off' + - mm/ksm: fix flag-dropping behavior in ksm_madvise + - ksmbd: Fix race condition in RPC handle list access + - ksmbd: fix error code overwriting in smb2_get_info_filesystem() + - ksmbd: add max ip connections parameter + - ext4: fix potential null deref in ext4_mb_init() + - ext4: fix checks for orphan inodes + - KVM: SVM: Skip fastpath emulation on VM-Exit if next RIP isn't valid + - fbdev: simplefb: Fix use after free in simplefb_detach_genpds() + - mm: hugetlb: avoid soft lockup when mprotect to large memory area + - selftests/mm: skip soft-dirty tests when CONFIG_MEM_SOFT_DIRTY is + disabled + - nvdimm: ndtest: Return -ENOMEM if devm_kcalloc() fails in ndtest_probe() + - misc: fastrpc: Save actual DMA size in fastrpc_map structure + - misc: fastrpc: Fix fastrpc_map_lookup operation + - misc: fastrpc: fix possible map leak in fastrpc_put_args + - misc: fastrpc: Skip reference for DMA handles + - Input: atmel_mxt_ts - allow reset GPIO to sleep + - Input: uinput - zero-initialize uinput_ff_upload_compat to avoid info + leak + - sunrpc: fix null pointer dereference on zero-length checksum + - PCI/AER: Avoid NULL pointer dereference in aer_ratelimit() + - remoteproc: pru: Fix potential NULL pointer dereference in + pru_rproc_set_ctable() + - PCI: endpoint: pci-epf-test: Add NULL check for DMA channels before + release + - thunderbolt: Fix use-after-free in tb_dp_dprx_work + - tee: fix register_shm_helper() + - pinctrl: check the return value of pinmux_ops::get_function_name() + - bus: fsl-mc: Check return value of platform_get_resource() + - net/9p: Fix buffer overflow in USB transport layer + - net: usb: asix: hold PM usage ref to avoid PM/MDIO + RTNL deadlock + - usb: typec: tipd: Clear interrupts first + - arm64: dts: qcom: qcm2290: Disable USB SS bus instances in park mode + - usb: cdns3: cdnsp-pci: remove redundant pci_disable_device() call + - scsi: ufs: core: Fix PM QoS mutex initialization + - drm/amdgpu/vcn: Fix double-free of vcn dump buffer + - Linux 6.17.3 + * CVE-2025-40019 + - crypto: essiv - Check ssize for decryption and in-place encryption + * CVE-2025-40214 + - af_unix: Initialise scc_index in unix_add_edge(). + * Miscellaneous Ubuntu changes + - [SAUCE] Fix selftest/net/rtnetlink.sh for Big Endian + + -- Abdur Rahman Fri, 16 Jan 2026 15:25:40 -0500 + +linux-nvidia-6.17 (6.17.0-1006.6) noble; urgency=medium + + * noble/linux-nvidia-6.17: 6.17.0-1006.6 -proposed tracker (LP: #2136206) + + * Packaging resync (LP: #1786013) + - [Packaging] update Ubuntu.md + - [Packaging] debian.nvidia-6.17/dkms-versions -- update from kernel- + versions (adhoc/d2025.12.15) + - [Packaging] update variants + + * Seeing a lot of these traces xhci_endpoint_init rsv 0x801 (LP: #2134851) + - Revert "NVIDIA: SAUCE: Fixes the kernel boot issues due to xhci mem + errors" + + * r8127: fix for LTS test panic (LP: #2134991) + - NVIDIA: SAUCE: r8127: Remove registers2 proc entry + + * Update GDS/NVMe SAUCE for v6.17 (LP: #2134960) + - NVIDIA: SAUCE: Patch NVMe/NVMeoF driver to support GDS on Linux 6.17 + Kernel + + * Aquantia: seeing arm-smmu-v3 at shutdown or module removal (LP: #2133755) + - net: aquantia: Add missing descriptor cache invalidation on ATL2 + + * Backport gpio: tegra186: Add support for Tegra410 (LP: #2131269) + - gpio: tegra186: Use generic macro for port definitions + - gpio: tegra186: Add support for Tegra410 + + * Backport perf/arm_cspmu: Preparatory patches for NVIDIA T410 PMU + (LP: #2131267) + - perf/arm_cspmu: Add callback to reset filter config + - perf/arm_cspmu: Add pmpidr support + - perf/arm_cspmu: nvidia: Add revision id matching + - perf/arm_cspmu: nvidia: Add pmevfiltr2 support + + * NULL pointer dereference during vEGM Libvirt VM lifecycle (LP: #2131582) + - NVIDIA: SAUCE: vfio/nvgrace-egm: Prevent double-unregister of + pfn_address_space + + * Add two more Spark iGPU IDs for the existing iommu quirk (LP: #2132033) + - NVIDIA: SAUCE: iommu/arm-smmu-v3: Add two more DGX Spark iGPU IDs for + existing iommu quirk + + * Pull CPPC mailing list patches for Spark (LP: #2131705) + - NVIDIA: SAUCE: cpufreq: CPPC: Add generic helpers for sysfs show/store + - NVIDIA: SAUCE: ACPI: CPPC: Add cppc_get_perf() API to read performance + controls + - NVIDIA: SAUCE: ACPI: CPPC: extend APIs to support auto_sel and epp + - NVIDIA: SAUCE: ACPI: CPPC: add APIs and sysfs interface for min/max_perf + - NVIDIA: SAUCE: ACPI: CPPC: add APIs and sysfs interface for perf_limited + register + - NVIDIA: SAUCE: cpufreq: CPPC: Add sysfs for min/max_perf and + perf_limited + - NVIDIA: SAUCE: cpufreq: CPPC: update policy min/max when toggling + auto_select + - NVIDIA: SAUCE: cpufreq: CPPC: add autonomous mode boot parameter support + + [ Ubuntu: 6.17.0-8.8 ] + + * questing/linux: 6.17.0-8.8 -proposed tracker (LP: #2131554) + * crash when reading from /sys/kernel/tracing/rv/enabled_monitors + (LP: #2131136) + - rv: Fully convert enabled_monitors to use list_head as iterator + * i40e driver is triggering VF resets on every link state change + (LP: #2130552) + - i40e: avoid redundant VF link state updates + * kernel crash on bootup for some arm64 machines (LP: #2129770) + - KVM: arm64: Guard PMSCR_EL1 initialization with SPE presence check + * CVE-2025-40018 + - ipvs: Defer ip_vs_ftp unregister during netns cleanup + + -- Jacob Martin Wed, 17 Dec 2025 09:59:28 -0600 + +linux-nvidia-6.17 (6.17.0-1004.4) noble; urgency=medium + + * noble/linux-nvidia-6.17: 6.17.0-1003.3 -proposed tracker (LP: #2131581) + + * kexec reports it cannot determine the file type of arm64 kernel images + (LP: #2131154) + - Revert "UBUNTU: [Packaging] Install compressed vmlinuz.efi on arm64" + + * Race condition in perf build causes build failure due to missing + unistd_64.h header on arm64 (LP: #2131702) + - perf tools: Fix arm64 libjvmti build by generating unistd_64.h + + * Packaging resync (LP: #1786013) + - [Packaging] debian.nvidia-6.17/dkms-versions -- update from kernel- + versions (main/d2025.11.04) + + * Binaries perf and bpftool missing from linux-tools-6.14.0-*nvidia{,-64k} + packages (LP: #2127953) + - [Packaging] Add do_tools_noble_hwe to include perf and bpftool in + SRCPKGNAME-tools-$(abi_release) + - [Packaging] nvidia-6.17: enable do_tools_noble_hwe + + * KVM initialization hitting exception at boot time with kernel 6.17 + (LP: #2130289) + - KVM: arm64: Guard PMSCR_EL1 initialization with SPE presence check + + * r8127: fix kernel panic when dump all registers (LP: #2130445) + - NVIDIA: SAUCE: r8127: fix a kernel panic when dump all registers + - NVIDIA: SAUCE: r8127: add support for RTL8127 cable diagnostic test + + * QSPI Transfer failed with timeout: 0 (LP: #2126589) + - spi: tegra210-quad: Fix timeout handling + - spi: tegra210-quad: Refactor error handling into helper functions + - spi: tegra210-quad: Check hardware status on timeout + + * Backport arm64: cpufeature: Add Olympus MIDR to BBML2 allow list + (LP: #2131047) + - arm64: cpufeature: Add Olympus MIDR to BBML2 allow list + + * Set CONFIG_IOMMU_DEFAULT_PASSTHROUGH as default for Nvidia CPUs + (LP: #2129776) + - NVIDIA: SAUCE: iommu/arm-smmu-v3: Set DGX Spark iGPU default domain type + to DMA + - [Config] nvidia-6.17: Update annotations to set + CONFIG_IOMMU_DEFAULT_PASSTHROUGH + + * mt7925: Introduce CSA support in non-MLO mode (LP: #2129209) + - NVIDIA: SAUCE: wifi: mt76: mt7925: introduce CSA support in non-MLO mode + + * IOMMU: Support contiguous bit in translation tables (LP: #2112600) + - NVIDIA: SAUCE: iommu/io-pgtable-arm: backport contiguous bit support + + [ Ubuntu: 6.17.0-7.7 ] + + * questing/linux: 6.17.0-7.7 -proposed tracker (LP: #2128695) + * Fix incorrect bug number for CONFIG_KERNEL_ZSTD (LP: #2127676) + - [Config] Fix bug note for CONFIG_KERNEL_ZSTD + * support Panter Lake CPU performance preferences (LP: #2127187) + - thermal: intel: int340x: Add support for power slider + - thermal: intel: int340x: Enable power slider interface for Panther Lake + - thermal: intel: int340x: Add module parameter for balanced Slider + - thermal: intel: int340x: Add module parameter to change slider offset + - thermal: intel: int340x: Power Slider: Validate slider_balance range + * [SRU][Q/P/N:hwe-6.14] mt7925: Add MBSS support (LP: #2119479) + - wifi: mt76: mt7925: add MBSSID support + * Plucky preinstalled server fails to boot on rb3gen2 (LP: #2106681) // + Questing preinstalled server fails to boot on sa8775p boards + (LP: #2121347) + - [Config] move more qcom interconnect/pinctrl/gcc options to builtin + * Packaging resync (LP: #1786013) + - [Packaging] update Ubuntu.md + * r8169 can not wake on LAN via SFP moudule (LP: #2123901) + - r8169: set EEE speed down ratio to 1 + * System hangs when running the memory stress test (LP: #2103680) + - mm: page_alloc: avoid kswapd thrashing due to NUMA restrictions + * Questing update: v6.17.2 upstream stable release (LP: #2128209) + - drm/amdgpu: Enable MES lr_compute_wa by default + - USB: serial: option: add SIMCom 8230C compositions + - Bluetooth: btusb: Add USB ID 2001:332a for D-Link AX9U rev. A1 + - wifi: rtlwifi: rtl8192cu: Don't claim USB ID 07b8:8188 + - wifi: rtl8xxxu: Don't claim USB ID 07b8:8188 + - rust: drm: fix `srctree/` links + - rust: block: fix `srctree/` links + - rust: pci: fix incorrect platform reference in PCI driver probe doc + comment + - rust: pci: fix incorrect platform reference in PCI driver unbind doc + comment + - serial: qcom-geni: Fix blocked task + - nvmem: layouts: fix automatic module loading + - drivers/misc/amd-sbi/Kconfig: select REGMAP_I2C + - binder: fix double-free in dbitmap + - serial: stm32: allow selecting console when the driver is module + - [Config] stm32: do not select console when driver is module + - staging: axis-fifo: fix maximum TX packet length check + - staging: axis-fifo: fix TX handling on copy_from_user() failure + - staging: axis-fifo: flush RX FIFO on read errors + - driver core: faux: Set power.no_pm for faux devices + - driver core/PM: Set power.no_callbacks along with power.no_pm + - Revert "crypto: testmgr - desupport SHA-1 for FIPS 140" + - crypto: zstd - Fix compression bug caused by truncation + - crypto: rng - Ensure set_ent is always present + - net/9p: fix double req put in p9_fd_cancelled + - KVM: x86: Don't (re)check L1 intercepts when completing userspace I/O + - f2fs: fix to do sanity check on node footer for non inode dnode + - ring buffer: Propagate __rb_map_vma return value to caller + - Linux 6.17.2 + + -- Jacob Martin Mon, 17 Nov 2025 16:17:33 -0600 linux-nvidia-6.17 (6.17.0-1002.2) noble; urgency=medium diff --git a/debian.nvidia-6.17/config/annotations b/debian.nvidia-6.17/config/annotations index f90afe88e3a07..0dcf99914905c 100644 --- a/debian.nvidia-6.17/config/annotations +++ b/debian.nvidia-6.17/config/annotations @@ -96,6 +96,9 @@ CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE note<'LP: #2028576: Perf governo CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL policy<{'amd64': 'n', 'arm64': 'n'}> CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL note<'LP: #2028576: Perf governor required for NVIDIA workloads'> +CONFIG_CXL_MEM_RAW_COMMANDS policy<{'amd64': 'n', 'arm64': 'y'}> +CONFIG_CXL_MEM_RAW_COMMANDS note<'Enable CXL raw commands for memory devices'> + CONFIG_DRM_NOUVEAU policy<{'amd64': 'n', 'arm64': 'n'}> CONFIG_DRM_NOUVEAU note<'Disable nouveau for NVIDIA kernels'> @@ -108,6 +111,9 @@ CONFIG_DRM_NOUVEAU_GSP_DEFAULT note<'Disable nouveau for NVIDIA CONFIG_DRM_NOUVEAU_SVM policy<{'amd64': '-', 'arm64': '-'}> CONFIG_DRM_NOUVEAU_SVM note<'Disable nouveau for NVIDIA kernels'> +CONFIG_DW_I3C_MASTER policy<{'amd64': 'n', 'arm64': 'm'}> +CONFIG_DW_I3C_MASTER note<'Enable DesignWare I3C master controller for Tegra410'> + CONFIG_EFI_CAPSULE_LOADER policy<{'amd64': 'm', 'arm64': 'y'}> CONFIG_EFI_CAPSULE_LOADER note<'LP: #2067111'> @@ -117,6 +123,9 @@ CONFIG_ETM4X_IMPDEF_FEATURE note<'Required for Grace enablem CONFIG_GPIO_AAEON policy<{'amd64': '-'}> CONFIG_GPIO_AAEON note<'Disable all Ubuntu ODM drivers'> +CONFIG_I3C policy<{'amd64': 'n', 'arm64': 'm'}> +CONFIG_I3C note<'Enable I3C bus support for Tegra410 and SPD5118 temperature sensors'> + CONFIG_IOMMU_DEFAULT_DMA_LAZY policy<{'amd64': 'y', 'arm64': 'n'}> CONFIG_IOMMU_DEFAULT_DMA_LAZY note<'On Nvidia CPU passthrough mode is recommend so set passthrough mode as default for better performance'> @@ -141,6 +150,9 @@ CONFIG_MICROSOFT_MANA note<'LP: #2084598'> CONFIG_MTD policy<{'amd64': 'm', 'arm64': 'y'}> CONFIG_MTD note<'Essential for boot on ARM64'> +CONFIG_MTK_PCIE_HOTPLUG policy<{'arm64': 'm'}> +CONFIG_MTK_PCIE_HOTPLUG note<'CX7 PCIe hotplug driver for NVIDIA DGX Spark systems with GB10 SoC.'> + CONFIG_NOUVEAU_DEBUG policy<{'amd64': '-', 'arm64': '-'}> CONFIG_NOUVEAU_DEBUG note<'Disable nouveau for NVIDIA kernels'> @@ -177,6 +189,9 @@ CONFIG_SAMPLE_CORESIGHT_SYSCFG note<'Required for Grace enablem CONFIG_SENSORS_AAEON policy<{'amd64': '-'}> CONFIG_SENSORS_AAEON note<'Disable all Ubuntu ODM drivers'> +CONFIG_SENSORS_SPD5118 policy<{'amd64': 'n', 'arm64': 'm'}> +CONFIG_SENSORS_SPD5118 note<'Enable SPD5118 temperature sensor support for DDR5 memory modules'> + CONFIG_SND_HDA_ACPI policy<{'amd64': 'n', 'arm64': 'm'}> CONFIG_SND_HDA_ACPI note<'Add support for ACPI-enumerated HDA'> @@ -195,10 +210,18 @@ CONFIG_ULTRASOC_SMB note<'Required for Grace enablem # ---- Annotations without notes ---- +CONFIG_ACPI_MPAM policy<{'amd64': '-', 'arm64': 'y'}> +CONFIG_ARCH_HAS_CPU_RESCTRL policy<{'amd64': 'y', 'arm64': 'y'}> +CONFIG_ARM64_MPAM policy<{'amd64': '-', 'arm64': 'y'}> +CONFIG_ARM64_MPAM_DRIVER policy<{'arm64': 'y'}> +CONFIG_ARM64_MPAM_DRIVER_DEBUG policy<{'amd64': '-', 'arm64': 'n'}> +CONFIG_ARM64_MPAM_RESCTRL_FS policy<{'arm64': 'y'}> +CONFIG_ARM_CPU_RESCTRL policy<{'amd64': '-', 'arm64': '-'}> CONFIG_ARM_FFA_TRANSPORT policy<{'arm64': 'y'}> CONFIG_ARM_SMMU_V3_IOMMUFD policy<{'arm64': 'y'}> CONFIG_AS_VERSION policy<{'amd64': '24200', 'arm64': '24200'}> CONFIG_AX88796B_RUST_PHY policy<{'amd64': '-', 'arm64': '-'}> +CONFIG_BATTERY_HUAWEI_GAOKUN policy<{'arm64': '-'}> CONFIG_BCH policy<{'amd64': 'm', 'arm64': 'y'}> CONFIG_BINDGEN_VERSION_TEXT policy<{'amd64': '-', 'arm64': '-'}> CONFIG_BLK_DEV_RUST_NULL policy<{'amd64': '-', 'arm64': '-'}> @@ -214,6 +237,7 @@ CONFIG_DRM_NOUVEAU_CH7006 policy<{'amd64': '-', 'arm64': ' CONFIG_DRM_NOUVEAU_SIL164 policy<{'amd64': '-', 'arm64': '-'}> CONFIG_DRM_NOVA policy<{'amd64': '-', 'arm64': '-'}> CONFIG_DRM_PANIC_SCREEN_QR_CODE policy<{'amd64': '-', 'arm64': '-'}> +CONFIG_EC_HUAWEI_GAOKUN policy<{'arm64': 'n'}> CONFIG_GCC_VERSION policy<{'amd64': '130300', 'arm64': '130300'}> CONFIG_HAVE_RUST policy<{'amd64': 'y', 'arm64': '-'}> CONFIG_IOMMUFD_VFIO_CONTAINER policy<{'arm64': 'y'}> @@ -223,8 +247,13 @@ CONFIG_NVGRACE_EGM policy<{'arm64': 'm'}> CONFIG_NVIDIA_FFA_EC policy<{'arm64': 'y'}> CONFIG_PAHOLE_VERSION policy<{'amd64': '125', 'arm64': '125'}> CONFIG_PINCTRL_MT8901 policy<{'arm64': 'y'}> +CONFIG_PROC_CPU_RESCTRL policy<{'amd64': 'y', 'arm64': 'y'}> CONFIG_R8127 policy<{'amd64': 'n', 'arm64': 'm'}> CONFIG_RELR policy<{'arm64': '-'}> +CONFIG_RESCTRL_FS policy<{'amd64': 'y', 'arm64': 'y'}> +CONFIG_RESCTRL_FS_PSEUDO_LOCK policy<{'amd64': 'y', 'arm64': '-'}> +CONFIG_RESCTRL_IOMMU policy<{'amd64': '-', 'arm64': 'y'}> +CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID policy<{'amd64': '-', 'arm64': 'y'}> CONFIG_RUSTC_HAS_COERCE_POINTEE policy<{'amd64': '-', 'arm64': '-'}> CONFIG_RUSTC_LLVM_VERSION policy<{'amd64': '180103', 'arm64': '180103'}> CONFIG_RUSTC_SUPPORTS_ARM64 policy<{'arm64': '-'}> @@ -238,5 +267,6 @@ CONFIG_RUST_PHYLIB_ABSTRACTIONS policy<{'amd64': '-', 'arm64': ' CONFIG_SAMPLES_RUST policy<{'amd64': '-', 'arm64': '-'}> CONFIG_TCG_ARM_CRB_FFA policy<{'arm64': 'y'}> CONFIG_TOOLS_SUPPORT_RELR policy<{'amd64': 'y', 'arm64': '-'}> +CONFIG_UCSI_HUAWEI_GAOKUN policy<{'arm64': '-'}> CONFIG_VFIO_CONTAINER policy<{'amd64': 'y', 'arm64': 'n'}> CONFIG_VFIO_IOMMU_TYPE1 policy<{'amd64': 'm', 'arm64': '-'}> diff --git a/debian.nvidia-6.17/control.stub.in b/debian.nvidia-6.17/control.stub.in index b10ee18491eba..808f7dca049c3 100644 --- a/debian.nvidia-6.17/control.stub.in +++ b/debian.nvidia-6.17/control.stub.in @@ -52,6 +52,8 @@ Build-Depends: uuid-dev , zstd , bpftool:native [amd64 arm64] , + nvidia-dkms-kernel [amd64 arm64] , + nvidia-kernel-source [amd64 arm64] , Build-Depends-Indep: asciidoc , bzip2 , diff --git a/debian.nvidia-6.17/dkms-versions b/debian.nvidia-6.17/dkms-versions index 546ff4b97ac94..131617ee2b513 100644 --- a/debian.nvidia-6.17/dkms-versions +++ b/debian.nvidia-6.17/dkms-versions @@ -1,3 +1,4 @@ -zfs-linux 2.3.4-1ubuntu2 modulename=zfs debpath=pool/universe/z/%package%/zfs-dkms_%version%_all.deb arch=amd64 arch=arm64 arch=ppc64el arch=s390x rprovides=spl-modules rprovides=spl-dkms rprovides=zfs-modules rprovides=zfs-dkms +zfs-linux 2.3.4-1ubuntu2 modulename=zfs debpath=pool/universe/z/%package%/zfs-dkms_%version%_all.deb arch=amd64 arch=arm64 arch=ppc64el arch=s390x arch=riscv64 rprovides=spl-modules rprovides=spl-dkms rprovides=zfs-modules rprovides=zfs-dkms v4l2loopback 0.15.0-0ubuntu2 modulename=v4l2loopback debpath=pool/universe/v/%package%/v4l2loopback-dkms_%version%_all.deb arch=amd64 rprovides=v4l2loopback-modules rprovides=v4l2loopback-dkms mstflint 4.26.0-1 modulename=mstflint_access debpath=pool/universe/m/%package%/mstflint-dkms_%version%_all.deb arch=amd64 arch=arm64 rprovides=mstflint-modules rprovides=mstflint-dkms +nvidia-fs 2.28.0-1 modulename=nvidia-fs debpath=pool/universe/n/%package%/nvidia-fs-dkms_%version%_amd64.deb arch=amd64 arch=arm64 rprovides=nvidia-fs-modules rprovides=nvidia-fs-dkms type=standalone diff --git a/debian.nvidia-6.17/reconstruct b/debian.nvidia-6.17/reconstruct index 2e620630217d9..8c46e786cef15 100644 --- a/debian.nvidia-6.17/reconstruct +++ b/debian.nvidia-6.17/reconstruct @@ -29,6 +29,9 @@ chmod +x 'debian/templates/image.preinst.in' chmod +x 'debian/templates/image.prerm.in' chmod +x 'debian/tests/rebuild' chmod +x 'debian/tests/ubuntu-regression-suite' +chmod -x 'drivers/edac/ecs.c' +chmod -x 'drivers/edac/mem_repair.c' +chmod -x 'drivers/edac/scrub.c' chmod +x 'drivers/net/ethernet/realtek/r8127/Makefile' chmod +x 'drivers/net/ethernet/realtek/r8127/r8127.h' chmod +x 'drivers/net/ethernet/realtek/r8127/r8127_dash.h' diff --git a/debian.nvidia-6.17/rules.d/arm64.mk b/debian.nvidia-6.17/rules.d/arm64.mk index 6a59fae676a5b..b1d68d2e7108e 100644 --- a/debian.nvidia-6.17/rules.d/arm64.mk +++ b/debian.nvidia-6.17/rules.d/arm64.mk @@ -1,8 +1,8 @@ build_arch = arm64 defconfig = defconfig flavours = nvidia nvidia-64k -build_image = vmlinuz.efi -kernel_file = arch/$(build_arch)/boot/vmlinuz.efi +build_image = Image.gz +kernel_file = arch/$(build_arch)/boot/Image.gz install_file = vmlinuz no_dumpfile = true uefi_signed = true diff --git a/debian.nvidia-6.17/tracking-bug b/debian.nvidia-6.17/tracking-bug index f1e509ef8d222..6400076c36ec6 100644 --- a/debian.nvidia-6.17/tracking-bug +++ b/debian.nvidia-6.17/tracking-bug @@ -1 +1 @@ -2131581 d2025.11.04-1 +2138765 d2025.12.18-2 diff --git a/debian.nvidia-6.17/variants b/debian.nvidia-6.17/variants index 3225f003f2f4f..7d12a65a2f915 100644 --- a/debian.nvidia-6.17/variants +++ b/debian.nvidia-6.17/variants @@ -1 +1,3 @@ -6.17 +-hwe-24.04-edge +-hwe-24.04 diff --git a/drivers/Kconfig b/drivers/Kconfig index 4915a63866b01..3054b50a2f4cb 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -251,4 +251,6 @@ source "drivers/hte/Kconfig" source "drivers/cdx/Kconfig" +source "drivers/resctrl/Kconfig" + endmenu diff --git a/drivers/Makefile b/drivers/Makefile index 9afe024f2d755..6c2af14c22b87 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -194,5 +194,6 @@ obj-$(CONFIG_HTE) += hte/ obj-$(CONFIG_DRM_ACCEL) += accel/ obj-$(CONFIG_CDX_BUS) += cdx/ obj-$(CONFIG_DPLL) += dpll/ +obj-y += resctrl/ obj-$(CONFIG_S390) += s390/ diff --git a/drivers/acpi/acpica/rsserial.c b/drivers/acpi/acpica/rsserial.c index 279bfa27da94d..1119c64795a77 100644 --- a/drivers/acpi/acpica/rsserial.c +++ b/drivers/acpi/acpica/rsserial.c @@ -315,7 +315,7 @@ struct acpi_rsconvert_info acpi_rs_convert_csi2_serial_bus[14] = { * ******************************************************************************/ -struct acpi_rsconvert_info acpi_rs_convert_i2c_serial_bus[17] = { +struct acpi_rsconvert_info acpi_rs_convert_i2c_serial_bus[18] = { {ACPI_RSC_INITGET, ACPI_RESOURCE_TYPE_SERIAL_BUS, ACPI_RS_SIZE(struct acpi_resource_i2c_serialbus), ACPI_RSC_TABLE_SIZE(acpi_rs_convert_i2c_serial_bus)}, @@ -391,6 +391,10 @@ struct acpi_rsconvert_info acpi_rs_convert_i2c_serial_bus[17] = { AML_OFFSET(i2c_serial_bus.type_specific_flags), 0}, + {ACPI_RSC_MOVE8, ACPI_RS_OFFSET(data.i2c_serial_bus.lvr), + AML_OFFSET(i2c_serial_bus.type_specific_flags) + 1, + 0}, + {ACPI_RSC_MOVE32, ACPI_RS_OFFSET(data.i2c_serial_bus.connection_speed), AML_OFFSET(i2c_serial_bus.connection_speed), 1}, diff --git a/drivers/acpi/arm64/Kconfig b/drivers/acpi/arm64/Kconfig index b3ed6212244c1..f2fd79f22e7d8 100644 --- a/drivers/acpi/arm64/Kconfig +++ b/drivers/acpi/arm64/Kconfig @@ -21,3 +21,6 @@ config ACPI_AGDI config ACPI_APMT bool + +config ACPI_MPAM + bool diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile index 05ecde9eaabe9..9390b57cb5648 100644 --- a/drivers/acpi/arm64/Makefile +++ b/drivers/acpi/arm64/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_ACPI_APMT) += apmt.o obj-$(CONFIG_ACPI_FFH) += ffh.o obj-$(CONFIG_ACPI_GTDT) += gtdt.o obj-$(CONFIG_ACPI_IORT) += iort.o +obj-$(CONFIG_ACPI_MPAM) += mpam.o obj-$(CONFIG_ACPI_PROCESSOR_IDLE) += cpuidle.o obj-$(CONFIG_ARM_AMBA) += amba.o obj-y += dma.o init.o diff --git a/drivers/acpi/arm64/mpam.c b/drivers/acpi/arm64/mpam.c new file mode 100644 index 0000000000000..845aedf61993d --- /dev/null +++ b/drivers/acpi/arm64/mpam.c @@ -0,0 +1,469 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2025 Arm Ltd. + +/* Parse the MPAM ACPI table feeding the discovered nodes into the driver */ + +#define pr_fmt(fmt) "ACPI MPAM: " fmt + +#include +#include +#include +#include +#include +#include + +#include + +/* + * Flags for acpi_table_mpam_msc.*_interrupt_flags. + * See 2.1.1 Interrupt Flags, Table 5, of DEN0065B_MPAM_ACPI_3.0-bet. + */ +#define ACPI_MPAM_MSC_IRQ_MODE BIT(0) +#define ACPI_MPAM_MSC_IRQ_TYPE_MASK GENMASK(2, 1) +#define ACPI_MPAM_MSC_IRQ_TYPE_WIRED 0 +#define ACPI_MPAM_MSC_IRQ_AFFINITY_TYPE_MASK BIT(3) +#define ACPI_MPAM_MSC_IRQ_AFFINITY_TYPE_PROCESSOR 0 +#define ACPI_MPAM_MSC_IRQ_AFFINITY_TYPE_PROCESSOR_CONTAINER 1 +#define ACPI_MPAM_MSC_IRQ_AFFINITY_VALID BIT(4) + +/* + * Encodings for the MSC node body interface type field. + * See 2.1 MPAM MSC node, Table 4 of DEN0065B_MPAM_ACPI_3.0-bet. + */ +#define ACPI_MPAM_MSC_IFACE_MMIO 0x00 +#define ACPI_MPAM_MSC_IFACE_PCC 0x0a + +static bool _is_ppi_partition(u32 flags) +{ + u32 aff_type, is_ppi; + bool ret; + + is_ppi = FIELD_GET(ACPI_MPAM_MSC_IRQ_AFFINITY_VALID, flags); + if (!is_ppi) + return false; + + aff_type = FIELD_GET(ACPI_MPAM_MSC_IRQ_AFFINITY_TYPE_MASK, flags); + ret = (aff_type == ACPI_MPAM_MSC_IRQ_AFFINITY_TYPE_PROCESSOR_CONTAINER); + if (ret) + pr_err_once("Partitioned interrupts not supported\n"); + + return ret; +} + +static bool acpi_mpam_register_irq(struct platform_device *pdev, int intid, + u32 flags, int *irq) +{ + u32 int_type; + int sense; + + if (!intid) + return false; + + if (_is_ppi_partition(flags)) + return false; + + sense = FIELD_GET(ACPI_MPAM_MSC_IRQ_MODE, flags); + int_type = FIELD_GET(ACPI_MPAM_MSC_IRQ_TYPE_MASK, flags); + if (int_type != ACPI_MPAM_MSC_IRQ_TYPE_WIRED) + return false; + + *irq = acpi_register_gsi(&pdev->dev, intid, sense, ACPI_ACTIVE_HIGH); + if (*irq <= 0) { + pr_err_once("Failed to register interrupt 0x%x with ACPI\n", + intid); + return false; + } + + return true; +} + +static void acpi_mpam_parse_irqs(struct platform_device *pdev, + struct acpi_mpam_msc_node *tbl_msc, + struct resource *res, int *res_idx) +{ + u32 flags, intid; + int irq; + + intid = tbl_msc->overflow_interrupt; + flags = tbl_msc->overflow_interrupt_flags; + if (acpi_mpam_register_irq(pdev, intid, flags, &irq)) + res[(*res_idx)++] = DEFINE_RES_IRQ_NAMED(irq, "overflow"); + + intid = tbl_msc->error_interrupt; + flags = tbl_msc->error_interrupt_flags; + if (acpi_mpam_register_irq(pdev, intid, flags, &irq)) + res[(*res_idx)++] = DEFINE_RES_IRQ_NAMED(irq, "error"); +} + +#define UUID_MPAM_INTERCONNECT_TABLE "fe2bd645-033b-49e6-9479-2e0b8b21d1cd" + +struct acpi_mpam_interconnect_descriptor_table { + u8 type_uuid[16]; + u32 num_descriptors; +}; + +struct acpi_mpam_interconnect_descriptor { + u32 source_id; + u32 destination_id; + u8 link_type; + u8 reserved[3]; +}; + +static int acpi_mpam_parse_resource(struct acpi_mpam_msc_node *tbl_msc, + struct mpam_msc *msc, + struct acpi_mpam_resource_node *res) +{ + struct acpi_mpam_interconnect_descriptor_table *tbl_int_tbl; + struct acpi_mpam_interconnect_descriptor *tbl_int; + guid_t int_tbl_uuid, spec_uuid; + int level, nid; + u32 cache_id; + off_t offset; + + /* + * Class IDs are somewhat arbitrary, but need to be co-ordinated. + * 0-N are caches, + * 64, 65: Interconnect, but ideally these would appear between the + * classes the controls are adjacent to. + * 128: SMMU, + * 192-192+level: Memory Side Caches, nothing checks that N is a + * small number. + * 255: Memory Controllers + * + * ACPI devices would need a class id allocated based on the _HID. + * + * Classes that the mpam driver can't currently plumb into resctrl + * are registered as UNKNOWN. + */ + switch (res->locator_type) { + case ACPI_MPAM_LOCATION_TYPE_PROCESSOR_CACHE: + cache_id = res->locator.cache_locator.cache_reference; + level = find_acpi_cache_level_from_id(cache_id); + if (level <= 0 || level >= 64) { + pr_err_once("Bad level (%d) for cache with id %u\n", level, cache_id); + return -EINVAL; + } + return mpam_ris_create(msc, res->ris_index, MPAM_CLASS_CACHE, + level, cache_id); + case ACPI_MPAM_LOCATION_TYPE_MEMORY: + nid = pxm_to_node(res->locator.memory_locator.proximity_domain); + if (nid == NUMA_NO_NODE) { + pr_debug("Bad proxmity domain %lld, using node 0 instead\n", + res->locator.memory_locator.proximity_domain); + nid = 0; + } + return mpam_ris_create(msc, res->ris_index, MPAM_CLASS_MEMORY, + 255, nid); + case ACPI_MPAM_LOCATION_TYPE_SMMU: + return mpam_ris_create(msc, res->ris_index, MPAM_CLASS_UNKNOWN, + 128, res->locator.smmu_locator.smmu_interface); + case ACPI_MPAM_LOCATION_TYPE_MEMORY_CACHE: + cache_id = res->locator.mem_cache_locator.reference; + level = res->locator.mem_cache_locator.level; + if (192 + level >= 255) { + pr_err_once("Bad level (%u) for memory side cache with reference %u\n", + level, cache_id); + return -EINVAL; + } + + return mpam_ris_create(msc, res->ris_index, MPAM_CLASS_CACHE, + 192 + level, cache_id); + + case ACPI_MPAM_LOCATION_TYPE_INTERCONNECT: + /* Find the descriptor table, and check it lands in the parent msc */ + offset = res->locator.interconnect_ifc_locator.inter_connect_desc_tbl_off; + if (offset >= tbl_msc->length) { + pr_err_once("Bad offset (%lu) for interconnect descriptor on msc %u\n", + offset, tbl_msc->identifier); + return -EINVAL; + } + tbl_int_tbl = ACPI_ADD_PTR(struct acpi_mpam_interconnect_descriptor_table, + tbl_msc, offset); + guid_parse(UUID_MPAM_INTERCONNECT_TABLE, &spec_uuid); + import_guid(&int_tbl_uuid, tbl_int_tbl->type_uuid); + if (guid_equal(&spec_uuid, &int_tbl_uuid)) { + pr_err_once("Bad UUID for interconnect descriptor on msc %u\n", + tbl_msc->identifier); + return -EINVAL; + } + + offset += sizeof(*tbl_int_tbl); + offset += tbl_int_tbl->num_descriptors * sizeof(*tbl_int); + if (offset >= tbl_msc->length) { + pr_err_once("Bad num_descriptors (%u) for interconnect descriptor on msc %u\n", + tbl_int_tbl->num_descriptors, tbl_msc->identifier); + return -EINVAL; + } + + tbl_int = ACPI_ADD_PTR(struct acpi_mpam_interconnect_descriptor, + tbl_int_tbl, sizeof(*tbl_int_tbl)); + cache_id = tbl_int->source_id; + + /* Unknown link type? */ + if (tbl_int->link_type != 0 && tbl_int->link_type == 1) + return 0; + + return mpam_ris_create(msc, res->ris_index, MPAM_CLASS_UNKNOWN, + 64 + tbl_int->link_type, cache_id); + default: + /* These get discovered later and are treated as unknown */ + return 0; + } +} + +int acpi_mpam_parse_resources(struct mpam_msc *msc, + struct acpi_mpam_msc_node *tbl_msc) +{ + int i, err; + char *ptr, *table_end; + struct acpi_mpam_resource_node *resource; + + ptr = (char *)(tbl_msc + 1); + table_end = ptr + tbl_msc->length; + for (i = 0; i < tbl_msc->num_resource_nodes; i++) { + u64 max_deps, remaining_table; + + if (ptr + sizeof(*resource) > table_end) + return -EINVAL; + + resource = (struct acpi_mpam_resource_node *)ptr; + + remaining_table = table_end - ptr; + max_deps = remaining_table / sizeof(struct acpi_mpam_func_deps); + if (resource->num_functional_deps > max_deps) { + pr_debug("MSC has impossible number of functional dependencies\n"); + return -EINVAL; + } + + err = acpi_mpam_parse_resource(tbl_msc, msc, resource); + if (err) + return err; + + ptr += sizeof(*resource); + ptr += resource->num_functional_deps * sizeof(struct acpi_mpam_func_deps); + } + + return 0; +} + +static bool __init parse_msc_pm_link(struct acpi_mpam_msc_node *tbl_msc, + struct platform_device *pdev, + u32 *acpi_id) +{ + char hid[sizeof(tbl_msc->hardware_id_linked_device) + 1] = { 0 }; + bool acpi_id_valid = false; + struct acpi_device *buddy; + char uid[11]; + int err; + + memcpy(hid, &tbl_msc->hardware_id_linked_device, + sizeof(tbl_msc->hardware_id_linked_device)); + + if (!strcmp(hid, ACPI_PROCESSOR_CONTAINER_HID)) { + *acpi_id = tbl_msc->instance_id_linked_device; + acpi_id_valid = true; + } + + err = snprintf(uid, sizeof(uid), "%u", + tbl_msc->instance_id_linked_device); + if (err >= sizeof(uid)) { + pr_debug("Failed to convert uid of device for power management."); + return acpi_id_valid; + } + + buddy = acpi_dev_get_first_match_dev(hid, uid, -1); + if (buddy) + device_link_add(&pdev->dev, &buddy->dev, DL_FLAG_STATELESS); + + return acpi_id_valid; +} + +static int decode_interface_type(struct acpi_mpam_msc_node *tbl_msc, + enum mpam_msc_iface *iface) +{ + switch (tbl_msc->interface_type) { + case ACPI_MPAM_MSC_IFACE_MMIO: + *iface = MPAM_IFACE_MMIO; + return 0; + case ACPI_MPAM_MSC_IFACE_PCC: + *iface = MPAM_IFACE_PCC; + return 0; + default: + return -EINVAL; + } +} + +static struct platform_device * __init acpi_mpam_parse_msc(struct acpi_mpam_msc_node *tbl_msc) +{ + struct platform_device *pdev __free(platform_device_put) = platform_device_alloc("mpam_msc", tbl_msc->identifier); + int next_res = 0, next_prop = 0, err; + /* pcc, nrdy, affinity and a sentinel */ + struct property_entry props[4] = { 0 }; + /* mmio, 2xirq, no sentinel. */ + struct resource res[3] = { 0 }; + struct acpi_device *companion; + enum mpam_msc_iface iface; + char uid[16]; + u32 acpi_id; + + if (!pdev) + return ERR_PTR(-ENOMEM); + + /* Some power management is described in the namespace: */ + err = snprintf(uid, sizeof(uid), "%u", tbl_msc->identifier); + if (err > 0 && err < sizeof(uid)) { + companion = acpi_dev_get_first_match_dev("ARMHAA5C", uid, -1); + if (companion) + ACPI_COMPANION_SET(&pdev->dev, companion); + else + pr_debug("MSC.%u: missing namespace entry\n", + tbl_msc->identifier); + } + + if (decode_interface_type(tbl_msc, &iface)) { + pr_debug("MSC.%u: unknown interface type\n", tbl_msc->identifier); + return ERR_PTR(-EINVAL); + } + + if (iface == MPAM_IFACE_MMIO) + res[next_res++] = DEFINE_RES_MEM_NAMED(tbl_msc->base_address, + tbl_msc->mmio_size, + "MPAM:MSC"); + else if (iface == MPAM_IFACE_PCC) + props[next_prop++] = PROPERTY_ENTRY_U32("pcc-channel", + tbl_msc->base_address); + + acpi_mpam_parse_irqs(pdev, tbl_msc, res, &next_res); + + WARN_ON_ONCE(next_res > ARRAY_SIZE(res)); + err = platform_device_add_resources(pdev, res, next_res); + if (err) + return ERR_PTR(err); + + props[next_prop++] = PROPERTY_ENTRY_U32("arm,not-ready-us", + tbl_msc->max_nrdy_usec); + + /* + * The MSC's CPU affinity is described via its linked power + * management device, but only if it points at a Processor or + * Processor Container. + */ + if (parse_msc_pm_link(tbl_msc, pdev, &acpi_id)) + props[next_prop++] = PROPERTY_ENTRY_U32("cpu_affinity", acpi_id); + + WARN_ON_ONCE(next_prop > ARRAY_SIZE(props)); + err = device_create_managed_software_node(&pdev->dev, props, NULL); + if (err) + return ERR_PTR(err); + + /* + * Stash the table entry for acpi_mpam_parse_resources() to discover + * what this MSC controls. + */ + err = platform_device_add_data(pdev, tbl_msc, tbl_msc->length); + if (err) + return ERR_PTR(err); + + err = platform_device_add(pdev); + if (err) + return ERR_PTR(err); + + return_ptr(pdev); +} + +static int __init acpi_mpam_parse(void) +{ + struct acpi_table_header *table __free(acpi_table) = acpi_get_table_ret(ACPI_SIG_MPAM, 0); + char *table_end, *table_offset = (char *)(table + 1); + struct acpi_mpam_msc_node *tbl_msc; + struct platform_device *pdev; + + if (acpi_disabled || !system_supports_mpam() || IS_ERR(table)) + return 0; + + if (table->revision < 1) + return 0; + + table_end = (char *)table + table->length; + + while (table_offset < table_end) { + tbl_msc = (struct acpi_mpam_msc_node *)table_offset; + table_offset += tbl_msc->length; + + if (table_offset > table_end) { + pr_err("MSC entry overlaps end of ACPI table\n"); + return -EINVAL; + } + + /* + * If any of the reserved fields are set, make no attempt to + * parse the MSC structure. This MSC will still be counted by + * acpi_mpam_count_msc(), meaning the MPAM driver can't probe + * against all MSC, and will never be enabled. There is no way + * to enable it safely, because we cannot determine safe + * system-wide partid and pmg ranges in this situation. + */ + if (tbl_msc->reserved || tbl_msc->reserved1 || tbl_msc->reserved2) { + pr_err_once("Unrecognised MSC, MPAM not usable\n"); + pr_debug("MSC.%u: reserved field set\n", tbl_msc->identifier); + continue; + } + + if (!tbl_msc->mmio_size) { + pr_debug("MSC.%u: marked as disabled\n", tbl_msc->identifier); + continue; + } + + pdev = acpi_mpam_parse_msc(tbl_msc); + if (IS_ERR(pdev)) + return PTR_ERR(pdev); + } + + return 0; +} + +/** + * acpi_mpam_count_msc() - Count the number of MSC described by firmware. + * + * Returns the number of of MSC, or zero for an error. + * + * This can be called before or in parallel with acpi_mpam_parse(). + */ +int acpi_mpam_count_msc(void) +{ + struct acpi_table_header *table __free(acpi_table) = acpi_get_table_ret(ACPI_SIG_MPAM, 0); + char *table_end, *table_offset = (char *)(table + 1); + struct acpi_mpam_msc_node *tbl_msc; + int count = 0; + + if (IS_ERR(table)) + return 0; + + if (table->revision < 1) + return 0; + + table_end = (char *)table + table->length; + + while (table_offset < table_end) { + tbl_msc = (struct acpi_mpam_msc_node *)table_offset; + if (!tbl_msc->mmio_size) + continue; + + if (tbl_msc->length < sizeof(*tbl_msc)) + return -EINVAL; + if (tbl_msc->length > table_end - table_offset) + return -EINVAL; + table_offset += tbl_msc->length; + + count++; + } + + return count; +} + +/* + * Call after ACPI devices have been created, which happens behind acpi_scan_init() + * called from subsys_initcall(). PCC requires the mailbox driver, which is + * initialised from postcore_initcall(). + */ +subsys_initcall_sync(acpi_mpam_parse); diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 002c3dde283ff..b6b1bf0bdd212 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1344,8 +1344,8 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps) struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpunum); struct cpc_register_resource *highest_reg, *lowest_reg, *lowest_non_linear_reg, *nominal_reg, *guaranteed_reg, - *low_freq_reg = NULL, *nom_freq_reg = NULL; - u64 high, low, guaranteed, nom, min_nonlinear, low_f = 0, nom_f = 0; + *low_freq_reg = NULL, *nom_freq_reg = NULL, *auto_sel_reg = NULL; + u64 high, low, guaranteed, nom, min_nonlinear, low_f = 0, nom_f = 0, auto_sel = 0; int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum); struct cppc_pcc_data *pcc_ss_data = NULL; int ret = 0, regs_in_pcc = 0; @@ -1362,11 +1362,12 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps) low_freq_reg = &cpc_desc->cpc_regs[LOWEST_FREQ]; nom_freq_reg = &cpc_desc->cpc_regs[NOMINAL_FREQ]; guaranteed_reg = &cpc_desc->cpc_regs[GUARANTEED_PERF]; + auto_sel_reg = &cpc_desc->cpc_regs[AUTO_SEL_ENABLE]; /* Are any of the regs PCC ?*/ if (CPC_IN_PCC(highest_reg) || CPC_IN_PCC(lowest_reg) || CPC_IN_PCC(lowest_non_linear_reg) || CPC_IN_PCC(nominal_reg) || - CPC_IN_PCC(low_freq_reg) || CPC_IN_PCC(nom_freq_reg)) { + CPC_IN_PCC(low_freq_reg) || CPC_IN_PCC(nom_freq_reg) || CPC_IN_PCC(auto_sel_reg)) { if (pcc_ss_id < 0) { pr_debug("Invalid pcc_ss_id\n"); return -ENODEV; @@ -1414,6 +1415,9 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps) perf_caps->lowest_freq = low_f; perf_caps->nominal_freq = nom_f; + if (CPC_SUPPORTED(auto_sel_reg)) + cpc_read(cpunum, auto_sel_reg, &auto_sel); + perf_caps->auto_sel = (bool)auto_sel; out_err: if (regs_in_pcc) @@ -1555,6 +1559,8 @@ int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable) struct cpc_register_resource *auto_sel_reg; struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu); struct cppc_pcc_data *pcc_ss_data = NULL; + bool autosel_support_in_ffh_or_sysmem; + bool epp_support_in_ffh_or_sysmem; int ret; if (!cpc_desc) { @@ -1565,6 +1571,11 @@ int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable) auto_sel_reg = &cpc_desc->cpc_regs[AUTO_SEL_ENABLE]; epp_set_reg = &cpc_desc->cpc_regs[ENERGY_PERF]; + epp_support_in_ffh_or_sysmem = CPC_SUPPORTED(epp_set_reg) && + (CPC_IN_FFH(epp_set_reg) || CPC_IN_SYSTEM_MEMORY(epp_set_reg)); + autosel_support_in_ffh_or_sysmem = CPC_SUPPORTED(auto_sel_reg) && + (CPC_IN_FFH(auto_sel_reg) || CPC_IN_SYSTEM_MEMORY(auto_sel_reg)); + if (CPC_IN_PCC(epp_set_reg) || CPC_IN_PCC(auto_sel_reg)) { if (pcc_ss_id < 0) { pr_debug("Invalid pcc_ss_id for CPU:%d\n", cpu); @@ -1589,14 +1600,29 @@ int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable) /* after writing CPC, transfer the ownership of PCC to platform */ ret = send_pcc_cmd(pcc_ss_id, CMD_WRITE); up_write(&pcc_ss_data->pcc_lock); - } else if (osc_cpc_flexible_adr_space_confirmed && - CPC_SUPPORTED(epp_set_reg) && CPC_IN_FFH(epp_set_reg)) { - ret = cpc_write(cpu, epp_set_reg, perf_ctrls->energy_perf); + } else if (osc_cpc_flexible_adr_space_confirmed) { + if (!epp_support_in_ffh_or_sysmem && !autosel_support_in_ffh_or_sysmem) { + ret = -EOPNOTSUPP; + } else { + if (autosel_support_in_ffh_or_sysmem) { + ret = cpc_write(cpu, auto_sel_reg, enable); + if (ret) + return ret; + } + + if (epp_support_in_ffh_or_sysmem) { + ret = cpc_write(cpu, epp_set_reg, perf_ctrls->energy_perf); + if (ret) + return ret; + } + } } else { - ret = -ENOTSUPP; - pr_debug("_CPC in PCC and _CPC in FFH are not supported\n"); + ret = -EOPNOTSUPP; } + if (ret == -EOPNOTSUPP) + pr_debug("_CPC in PCC and _CPC in FFH are not supported\n"); + return ret; } EXPORT_SYMBOL_GPL(cppc_set_epp_perf); @@ -1608,7 +1634,7 @@ EXPORT_SYMBOL_GPL(cppc_set_epp_perf); */ int cppc_set_epp(int cpu, u64 epp_val) { - if (epp_val > CPPC_ENERGY_PERF_MAX) + if (epp_val > CPPC_EPP_ENERGY_EFFICIENCY_PREF) return -EINVAL; return cppc_set_reg_val(cpu, ENERGY_PERF, epp_val); @@ -1732,6 +1758,158 @@ int cppc_set_enable(int cpu, bool enable) } EXPORT_SYMBOL_GPL(cppc_set_enable); +/** + * cppc_get_min_perf - Get the min performance register value. + * @cpu: CPU from which to get min performance. + * @min_perf: Return address. + * + * Return: 0 for success, -EIO on register access failure, -EOPNOTSUPP if not supported. + */ +int cppc_get_min_perf(int cpu, u64 *min_perf) +{ + return cppc_get_reg_val(cpu, MIN_PERF, min_perf); +} +EXPORT_SYMBOL_GPL(cppc_get_min_perf); + +/** + * cppc_set_min_perf() - Write the min performance register. + * @cpu: CPU on which to write register. + * @min_perf: Value to write to the MIN_PERF register. + * + * Return: 0 for success, -EIO otherwise. + */ +int cppc_set_min_perf(int cpu, u64 min_perf) +{ + return cppc_set_reg_val(cpu, MIN_PERF, min_perf); +} +EXPORT_SYMBOL_GPL(cppc_set_min_perf); + +/** + * cppc_get_max_perf - Get the max performance register value. + * @cpu: CPU from which to get max performance. + * @max_perf: Return address. + * + * Return: 0 for success, -EIO on register access failure, -EOPNOTSUPP if not supported. + */ +int cppc_get_max_perf(int cpu, u64 *max_perf) +{ + return cppc_get_reg_val(cpu, MAX_PERF, max_perf); +} +EXPORT_SYMBOL_GPL(cppc_get_max_perf); + +/** + * cppc_set_max_perf() - Write the max performance register. + * @cpu: CPU on which to write register. + * @max_perf: Value to write to the MAX_PERF register. + * + * Return: 0 for success, -EIO otherwise. + */ +int cppc_set_max_perf(int cpu, u64 max_perf) +{ + return cppc_set_reg_val(cpu, MAX_PERF, max_perf); +} +EXPORT_SYMBOL_GPL(cppc_set_max_perf); + +/** + * cppc_get_perf_limited - Get the Performance Limited register value. + * @cpu: CPU from which to get Performance Limited register. + * @perf_limited: Pointer to store the Performance Limited value. + * + * Return: 0 for success, -EIO on register access failure, -EOPNOTSUPP if not supported. + */ +int cppc_get_perf_limited(int cpu, u64 *perf_limited) +{ + return cppc_get_reg_val(cpu, PERF_LIMITED, perf_limited); +} +EXPORT_SYMBOL_GPL(cppc_get_perf_limited); + +/** + * cppc_set_perf_limited() - Write the Performance Limited register. + * @cpu: CPU on which to write register. + * @perf_limited: Value to write to the perf_limited register. + * + * Return: 0 for success, -EIO on register access failure, -EOPNOTSUPP if not supported. + */ +int cppc_set_perf_limited(int cpu, u64 perf_limited) +{ + return cppc_set_reg_val(cpu, PERF_LIMITED, perf_limited); +} +EXPORT_SYMBOL_GPL(cppc_set_perf_limited); + +/** + * cppc_get_perf - Get a CPU's performance controls. + * @cpu: CPU for which to get performance controls. + * @perf_ctrls: ptr to cppc_perf_ctrls. See cppc_acpi.h + * + * Return: 0 for success with perf_ctrls, -ERRNO otherwise. + */ +int cppc_get_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) +{ + struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu); + struct cpc_register_resource *desired_perf_reg, *min_perf_reg, *max_perf_reg, + *energy_perf_reg; + u64 desired_perf = 0, min = 0, max = 0, energy_perf = 0; + int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); + struct cppc_pcc_data *pcc_ss_data = NULL; + int ret = 0, regs_in_pcc = 0; + + if (!cpc_desc) { + pr_debug("No CPC descriptor for CPU:%d\n", cpu); + return -ENODEV; + } + + if (!perf_ctrls) { + pr_debug("Invalid perf_ctrls pointer\n"); + return -EINVAL; + } + + desired_perf_reg = &cpc_desc->cpc_regs[DESIRED_PERF]; + min_perf_reg = &cpc_desc->cpc_regs[MIN_PERF]; + max_perf_reg = &cpc_desc->cpc_regs[MAX_PERF]; + energy_perf_reg = &cpc_desc->cpc_regs[ENERGY_PERF]; + + /* Are any of the regs PCC ?*/ + if (CPC_IN_PCC(desired_perf_reg) || CPC_IN_PCC(min_perf_reg) || + CPC_IN_PCC(max_perf_reg) || CPC_IN_PCC(energy_perf_reg)) { + if (pcc_ss_id < 0) { + pr_debug("Invalid pcc_ss_id for CPU:%d\n", cpu); + return -ENODEV; + } + pcc_ss_data = pcc_data[pcc_ss_id]; + regs_in_pcc = 1; + down_write(&pcc_ss_data->pcc_lock); + /* Ring doorbell once to update PCC subspace */ + if (send_pcc_cmd(pcc_ss_id, CMD_READ) < 0) { + pr_debug("Failed to send PCC command for CPU:%d, ret:%d\n", cpu, ret); + ret = -EIO; + goto out_err; + } + } + + /* Read optional elements if present */ + if (CPC_SUPPORTED(max_perf_reg)) + cpc_read(cpu, max_perf_reg, &max); + perf_ctrls->max_perf = max; + + if (CPC_SUPPORTED(min_perf_reg)) + cpc_read(cpu, min_perf_reg, &min); + perf_ctrls->min_perf = min; + + if (CPC_SUPPORTED(desired_perf_reg)) + cpc_read(cpu, desired_perf_reg, &desired_perf); + perf_ctrls->desired_perf = desired_perf; + + if (CPC_SUPPORTED(energy_perf_reg)) + cpc_read(cpu, energy_perf_reg, &energy_perf); + perf_ctrls->energy_perf = energy_perf; + +out_err: + if (regs_in_pcc) + up_write(&pcc_ss_data->pcc_lock); + return ret; +} +EXPORT_SYMBOL_GPL(cppc_get_perf); + /** * cppc_set_perf - Set a CPU's performance controls. * @cpu: CPU for which to set performance controls. diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c index 54676e3d82dd5..4adb3de10c3de 100644 --- a/drivers/acpi/pptt.c +++ b/drivers/acpi/pptt.c @@ -21,6 +21,15 @@ #include #include +#define for_each_acpi_pptt_entry(table, entry) \ + for ((entry = ACPI_ADD_PTR(struct acpi_subtable_header, table, \ + sizeof(struct acpi_table_pptt))); \ + ((unsigned long)entry + sizeof(struct acpi_subtable_header)) \ + <= ((unsigned long)table + table->length); \ + (entry = ACPI_ADD_PTR(struct acpi_subtable_header, entry, \ + (entry)->length))) + + static struct acpi_subtable_header *fetch_pptt_subtable(struct acpi_table_header *table_hdr, u32 pptt_ref) { @@ -177,14 +186,14 @@ acpi_find_cache_level(struct acpi_table_header *table_hdr, } /** - * acpi_count_levels() - Given a PPTT table, and a CPU node, count the cache - * levels and split cache levels (data/instruction). + * acpi_count_levels() - Given a PPTT table, and a CPU node, count the + * total number of levels and split cache levels (data/instruction). * @table_hdr: Pointer to the head of the PPTT table * @cpu_node: processor node we wish to count caches for - * @levels: Number of levels if success. * @split_levels: Number of split cache levels (data/instruction) if * success. Can by NULL. * + * Return: number of levels. * Given a processor node containing a processing unit, walk into it and count * how many levels exist solely for it, and then walk up each level until we hit * the root node (ignore the package level because it may be possible to have @@ -192,14 +201,18 @@ acpi_find_cache_level(struct acpi_table_header *table_hdr, * split cache levels (data/instruction) that exist at each level on the way * up. */ -static void acpi_count_levels(struct acpi_table_header *table_hdr, - struct acpi_pptt_processor *cpu_node, - unsigned int *levels, unsigned int *split_levels) +static int acpi_count_levels(struct acpi_table_header *table_hdr, + struct acpi_pptt_processor *cpu_node, + unsigned int *split_levels) { + int starting_level = 0; + do { - acpi_find_cache_level(table_hdr, cpu_node, levels, split_levels, 0, 0); + acpi_find_cache_level(table_hdr, cpu_node, &starting_level, split_levels, 0, 0); cpu_node = fetch_pptt_node(table_hdr, cpu_node->parent); } while (cpu_node); + + return starting_level; } /** @@ -217,22 +230,16 @@ static int acpi_pptt_leaf_node(struct acpi_table_header *table_hdr, struct acpi_pptt_processor *node) { struct acpi_subtable_header *entry; - unsigned long table_end; u32 node_entry; struct acpi_pptt_processor *cpu_node; - u32 proc_sz; if (table_hdr->revision > 1) return (node->flags & ACPI_PPTT_ACPI_LEAF_NODE); - table_end = (unsigned long)table_hdr + table_hdr->length; node_entry = ACPI_PTR_DIFF(node, table_hdr); - entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr, - sizeof(struct acpi_table_pptt)); - proc_sz = sizeof(struct acpi_pptt_processor); /* ignore subtable types that are smaller than a processor node */ - while ((unsigned long)entry + proc_sz <= table_end) { + for_each_acpi_pptt_entry(table_hdr, entry) { cpu_node = (struct acpi_pptt_processor *)entry; if (entry->type == ACPI_PPTT_TYPE_PROCESSOR && @@ -240,9 +247,6 @@ static int acpi_pptt_leaf_node(struct acpi_table_header *table_hdr, return 0; if (entry->length == 0) return 0; - - entry = ACPI_ADD_PTR(struct acpi_subtable_header, entry, - entry->length); } return 1; } @@ -270,12 +274,10 @@ static struct acpi_pptt_processor *acpi_find_processor_node(struct acpi_table_he u32 proc_sz; table_end = (unsigned long)table_hdr + table_hdr->length; - entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr, - sizeof(struct acpi_table_pptt)); proc_sz = sizeof(struct acpi_pptt_processor); /* find the processor structure associated with this cpuid */ - while ((unsigned long)entry + proc_sz <= table_end) { + for_each_acpi_pptt_entry(table_hdr, entry) { cpu_node = (struct acpi_pptt_processor *)entry; if (entry->length == 0) { @@ -290,9 +292,6 @@ static struct acpi_pptt_processor *acpi_find_processor_node(struct acpi_table_he acpi_pptt_leaf_node(table_hdr, cpu_node)) { return (struct acpi_pptt_processor *)entry; } - - entry = ACPI_ADD_PTR(struct acpi_subtable_header, entry, - entry->length); } return NULL; @@ -346,6 +345,27 @@ static struct acpi_pptt_cache *acpi_find_cache_node(struct acpi_table_header *ta return found; } +static struct acpi_pptt_cache * +acpi_find_any_type_cache_node(struct acpi_table_header *table_hdr, + u32 acpi_cpu_id, unsigned int level, + struct acpi_pptt_processor **node) +{ + struct acpi_pptt_cache *cache; + + cache = acpi_find_cache_node(table_hdr, acpi_cpu_id, CACHE_TYPE_UNIFIED, + level, node); + if (cache) + return cache; + + cache = acpi_find_cache_node(table_hdr, acpi_cpu_id, CACHE_TYPE_DATA, + level, node); + if (cache) + return cache; + + return acpi_find_cache_node(table_hdr, acpi_cpu_id, CACHE_TYPE_INST, + level, node); +} + /** * update_cache_properties() - Update cacheinfo for the given processor * @this_leaf: Kernel cache info structure being updated @@ -645,7 +665,7 @@ int acpi_get_cache_info(unsigned int cpu, unsigned int *levels, if (!cpu_node) return -ENOENT; - acpi_count_levels(table, cpu_node, levels, split_levels); + *levels = acpi_count_levels(table, cpu_node, split_levels); pr_debug("Cache Setup: last_level=%d split_levels=%d\n", *levels, split_levels ? *split_levels : -1); @@ -817,3 +837,201 @@ int find_acpi_cpu_topology_hetero_id(unsigned int cpu) return find_acpi_cpu_topology_tag(cpu, PPTT_ABORT_PACKAGE, ACPI_PPTT_ACPI_IDENTICAL); } + +/** + * acpi_pptt_get_child_cpus() - Find all the CPUs below a PPTT processor node + * @table_hdr: A reference to the PPTT table. + * @parent_node: A pointer to the processor node in the @table_hdr. + * @cpus: A cpumask to fill with the CPUs below @parent_node. + * + * Walks up the PPTT from every possible CPU to find if the provided + * @parent_node is a parent of this CPU. + */ +static void acpi_pptt_get_child_cpus(struct acpi_table_header *table_hdr, + struct acpi_pptt_processor *parent_node, + cpumask_t *cpus) +{ + struct acpi_pptt_processor *cpu_node; + u32 acpi_id; + int cpu; + + cpumask_clear(cpus); + + for_each_possible_cpu(cpu) { + acpi_id = get_acpi_id_for_cpu(cpu); + cpu_node = acpi_find_processor_node(table_hdr, acpi_id); + + while (cpu_node) { + if (cpu_node == parent_node) { + cpumask_set_cpu(cpu, cpus); + break; + } + cpu_node = fetch_pptt_node(table_hdr, cpu_node->parent); + } + } +} + +/** + * acpi_pptt_get_cpus_from_container() - Populate a cpumask with all CPUs in a + * processor container + * @acpi_cpu_id: The UID of the processor container. + * @cpus: The resulting CPU mask. + * + * Find the specified Processor Container, and fill @cpus with all the cpus + * below it. + * + * Not all 'Processor' entries in the PPTT are either a CPU or a Processor + * Container, they may exist purely to describe a Private resource. CPUs + * have to be leaves, so a Processor Container is a non-leaf that has the + * 'ACPI Processor ID valid' flag set. + */ +void acpi_pptt_get_cpus_from_container(u32 acpi_cpu_id, cpumask_t *cpus) +{ + struct acpi_table_header *table_hdr; + struct acpi_subtable_header *entry; + + cpumask_clear(cpus); + + table_hdr = acpi_get_pptt(); + if (!table_hdr) + return; + + for_each_acpi_pptt_entry(table_hdr, entry) { + if (entry->type == ACPI_PPTT_TYPE_PROCESSOR) { + struct acpi_pptt_processor *cpu_node; + + cpu_node = (struct acpi_pptt_processor *)entry; + if (cpu_node->flags & ACPI_PPTT_ACPI_PROCESSOR_ID_VALID && + !acpi_pptt_leaf_node(table_hdr, cpu_node) && + cpu_node->acpi_processor_id == acpi_cpu_id) { + acpi_pptt_get_child_cpus(table_hdr, cpu_node, cpus); + break; + } + } + } +} + +/* + * find_acpi_cache_level_from_id() - Get the level of the specified cache + * @cache_id: The id field of the cache + * + * Determine the level relative to any CPU for the cache identified by + * cache_id. This allows the property to be found even if the CPUs are offline. + * + * The returned level can be used to group caches that are peers. + * + * The PPTT table must be rev 3 or later. + * + * If one CPU's L2 is shared with another CPU as L3, this function will return + * an unpredictable value. + * + * Return: -ENOENT if the PPTT doesn't exist, the revision isn't supported or + * the cache cannot be found. + * Otherwise returns a value which represents the level of the specified cache. + */ +int find_acpi_cache_level_from_id(u32 cache_id) +{ + int level, cpu; + u32 acpi_cpu_id; + struct acpi_pptt_cache *cache; + struct acpi_table_header *table; + struct acpi_pptt_cache_v1 *cache_v1; + struct acpi_pptt_processor *cpu_node; + + table = acpi_get_pptt(); + if (!table) + return -ENOENT; + + if (table->revision < 3) + return -ENOENT; + + for_each_possible_cpu(cpu) { + acpi_cpu_id = get_acpi_id_for_cpu(cpu); + cpu_node = acpi_find_processor_node(table, acpi_cpu_id); + if (!cpu_node) + continue; + + /* Start at 1 for L1 */ + level = 1; + cache = acpi_find_any_type_cache_node(table, acpi_cpu_id, level, + &cpu_node); + while (cache) { + cache_v1 = ACPI_ADD_PTR(struct acpi_pptt_cache_v1, + cache, sizeof(*cache)); + + if (cache->flags & ACPI_PPTT_CACHE_ID_VALID && + cache_v1->cache_id == cache_id) + return level; + + level++; + cache = acpi_find_any_type_cache_node(table, acpi_cpu_id, + level, &cpu_node); + } + } + + return -ENOENT; +} + +/** + * acpi_pptt_get_cpumask_from_cache_id() - Get the cpus associated with the + * specified cache + * @cache_id: The id field of the cache + * @cpus: Where to build the cpumask + * + * Determine which CPUs are below this cache in the PPTT. This allows the property + * to be found even if the CPUs are offline. + * + * The PPTT table must be rev 3 or later, + * + * Return: -ENOENT if the PPTT doesn't exist, or the cache cannot be found. + * Otherwise returns 0 and sets the cpus in the provided cpumask. + */ +int acpi_pptt_get_cpumask_from_cache_id(u32 cache_id, cpumask_t *cpus) +{ + int level, cpu; + u32 acpi_cpu_id; + struct acpi_pptt_cache *cache; + struct acpi_table_header *table; + struct acpi_pptt_cache_v1 *cache_v1; + struct acpi_pptt_processor *cpu_node; + + cpumask_clear(cpus); + + table = acpi_get_pptt(); + if (!table) + return -ENOENT; + + if (table->revision < 3) + return -ENOENT; + + for_each_possible_cpu(cpu) { + acpi_cpu_id = get_acpi_id_for_cpu(cpu); + cpu_node = acpi_find_processor_node(table, acpi_cpu_id); + if (!cpu_node) + continue; + + /* Start at 1 for L1 */ + level = 1; + cache = acpi_find_any_type_cache_node(table, acpi_cpu_id, level, + &cpu_node); + while (cache) { + cache_v1 = ACPI_ADD_PTR(struct acpi_pptt_cache_v1, + cache, sizeof(*cache)); + if (!cache) + continue; + + cache_v1 = ACPI_ADD_PTR(struct acpi_pptt_cache_v1, + cache, sizeof(*cache)); + + if (cache->flags & ACPI_PPTT_CACHE_ID_VALID && + cache_v1->cache_id == cache_id) + cpumask_set_cpu(cpu, cpus); + + level++; + cache = acpi_find_any_type_cache_node(table, acpi_cpu_id, + level, &cpu_node); + } + } + + return 0; +} diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index fa9bb8c8ce953..f0cf9d7562e08 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -408,7 +408,7 @@ static const char table_sigs[][ACPI_NAMESEG_SIZE] __nonstring_array __initconst ACPI_SIG_PSDT, ACPI_SIG_RSDT, ACPI_SIG_XSDT, ACPI_SIG_SSDT, ACPI_SIG_IORT, ACPI_SIG_NFIT, ACPI_SIG_HMAT, ACPI_SIG_PPTT, ACPI_SIG_NHLT, ACPI_SIG_AEST, ACPI_SIG_CEDT, ACPI_SIG_AGDI, - ACPI_SIG_NBFT }; + ACPI_SIG_NBFT, ACPI_SIG_MPAM}; #define ACPI_HEADER_SIZE sizeof(struct acpi_table_header) diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index 9a16b4e7c2fc3..1d0094c3d8729 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -210,8 +210,7 @@ static bool match_cache_node(struct device_node *cpu, #define arch_compact_of_hwid(_x) (_x) #endif -static void cache_of_set_id(struct cacheinfo *this_leaf, - struct device_node *cache_node) +u32 cache_of_calculate_id(struct device_node *cache_node) { struct device_node *cpu; u32 min_id = ~0; @@ -222,15 +221,23 @@ static void cache_of_set_id(struct cacheinfo *this_leaf, id = arch_compact_of_hwid(id); if (FIELD_GET(GENMASK_ULL(63, 32), id)) { of_node_put(cpu); - return; + return ~0; } if (match_cache_node(cpu, cache_node)) min_id = min(min_id, id); } - if (min_id != ~0) { - this_leaf->id = min_id; + return min_id; +} + +static void cache_of_set_id(struct cacheinfo *this_leaf, + struct device_node *cache_node) +{ + u32 id = cache_of_calculate_id(cache_node); + + if (id != ~0) { + this_leaf->id = id; this_leaf->attributes |= CACHE_ID; } } diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index dd8efe4fb967f..08d3fe50d3f79 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -23,11 +23,17 @@ #include #include +#include #include static struct cpufreq_driver cppc_cpufreq_driver; +static DEFINE_MUTEX(cppc_cpufreq_update_autosel_config_lock); + +/* Autonomous Selection */ +static bool auto_sel_mode; + #ifdef CONFIG_ACPI_CPPC_CPUFREQ_FIE static enum { FIE_UNSET = -1, @@ -272,8 +278,13 @@ static int cppc_cpufreq_set_target(struct cpufreq_policy *policy, freqs.old = policy->cur; freqs.new = target_freq; + /* + * In autonomous selection mode, hardware handles frequency scaling directly + * based on workload and EPP hints. So, skip the OS frequency set requests. + */ cpufreq_freq_transition_begin(policy, &freqs); - ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls); + if (!cpu_data->perf_caps.auto_sel) + ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls); cpufreq_freq_transition_end(policy, &freqs, ret != 0); if (ret) @@ -565,6 +576,12 @@ static struct cppc_cpudata *cppc_cpufreq_get_cpu_data(unsigned int cpu) goto free_mask; } + ret = cppc_get_perf(cpu, &cpu_data->perf_ctrls); + if (ret) { + pr_debug("Err reading CPU%d perf ctrls: ret:%d\n", cpu, ret); + goto free_mask; + } + return cpu_data; free_mask: @@ -584,11 +601,163 @@ static void cppc_cpufreq_put_cpu_data(struct cpufreq_policy *policy) policy->driver_data = NULL; } +/** + * cppc_cpufreq_set_mperf_limit - Generic function to set min/max performance limit + * @policy: cpufreq policy + * @val: performance value to set + * @update_reg: whether to update hardware register + * @update_policy: whether to update policy constraints + * @is_min: true for min_perf, false for max_perf + */ +static int cppc_cpufreq_set_mperf_limit(struct cpufreq_policy *policy, u64 val, + bool update_reg, bool update_policy, bool is_min) +{ + struct cppc_cpudata *cpu_data = policy->driver_data; + struct cppc_perf_caps *caps = &cpu_data->perf_caps; + unsigned int cpu = policy->cpu; + struct freq_qos_request *req; + unsigned int freq; + u32 perf; + int ret; + + perf = clamp(val, caps->lowest_perf, caps->highest_perf); + freq = cppc_perf_to_khz(caps, perf); + + pr_debug("cpu%d, %s_perf:%llu, update_reg:%d, update_policy:%d\n", cpu, + is_min ? "min" : "max", (u64)perf, update_reg, update_policy); + + guard(mutex)(&cppc_cpufreq_update_autosel_config_lock); + + if (update_reg) { + ret = is_min ? cppc_set_min_perf(cpu, perf) : cppc_set_max_perf(cpu, perf); + if (ret) { + if (ret != -EOPNOTSUPP) + pr_warn("Failed to set %s_perf (%llu) on CPU%d (%d)\n", + is_min ? "min" : "max", (u64)perf, cpu, ret); + return ret; + } + + if (is_min) + cpu_data->perf_ctrls.min_perf = perf; + else + cpu_data->perf_ctrls.max_perf = perf; + } + + if (update_policy) { + req = is_min ? policy->min_freq_req : policy->max_freq_req; + + ret = freq_qos_update_request(req, freq); + if (ret < 0) { + pr_warn("Failed to update %s_freq constraint for CPU%d: %d\n", + is_min ? "min" : "max", cpu, ret); + return ret; + } + } + + return 0; +} + +#define cppc_cpufreq_set_min_perf(policy, val, update_reg, update_policy) \ + cppc_cpufreq_set_mperf_limit(policy, val, update_reg, update_policy, true) + +#define cppc_cpufreq_set_max_perf(policy, val, update_reg, update_policy) \ + cppc_cpufreq_set_mperf_limit(policy, val, update_reg, update_policy, false) + +static int cppc_cpufreq_update_autosel_val(struct cpufreq_policy *policy, bool auto_sel) +{ + struct cppc_cpudata *cpu_data = policy->driver_data; + unsigned int cpu = policy->cpu; + int ret; + + pr_debug("cpu%d, auto_sel curr:%u, new:%d\n", cpu, cpu_data->perf_caps.auto_sel, auto_sel); + + guard(mutex)(&cppc_cpufreq_update_autosel_config_lock); + + ret = cppc_set_auto_sel(cpu, auto_sel); + if (ret) { + pr_warn("Failed to set auto_sel=%d for CPU%d (%d)\n", auto_sel, cpu, ret); + return ret; + } + cpu_data->perf_caps.auto_sel = auto_sel; + + return 0; +} + +static int cppc_cpufreq_update_epp_val(struct cpufreq_policy *policy, u32 epp) +{ + struct cppc_cpudata *cpu_data = policy->driver_data; + unsigned int cpu = policy->cpu; + int ret; + + pr_debug("cpu%d, epp curr:%u, new:%u\n", cpu, cpu_data->perf_ctrls.energy_perf, epp); + + guard(mutex)(&cppc_cpufreq_update_autosel_config_lock); + + ret = cppc_set_epp(cpu, epp); + if (ret) { + pr_warn("failed to set energy_perf for cpu:%d (%d)\n", cpu, ret); + return ret; + } + cpu_data->perf_ctrls.energy_perf = epp; + + return 0; +} + +/** + * cppc_cpufreq_update_autosel_config - Update Autonomous selection configuration + * @policy: cpufreq policy for the CPU + * @min_perf: minimum performance value to set + * @max_perf: maximum performance value to set + * @auto_sel: autonomous selection mode enable/disable (also controls min/max perf reg updates) + * @epp_val: energy performance preference value + * @update_epp: whether to update EPP register + * @update_policy: whether to update policy constraints + * + * Return: 0 on success, negative error code on failure + */ +static int cppc_cpufreq_update_autosel_config(struct cpufreq_policy *policy, + u64 min_perf, u64 max_perf, bool auto_sel, + u32 epp_val, bool update_epp, bool update_policy) +{ + const unsigned int cpu = policy->cpu; + int ret; + + /* + * Set min/max performance registers and update policy constraints. + * When enabling: update both registers and policy. + * When disabling: update policy only. + * Continue even if min/max are not supported, as EPP and autosel + * might still be supported. + */ + ret = cppc_cpufreq_set_min_perf(policy, min_perf, auto_sel, update_policy); + if (ret && ret != -EOPNOTSUPP) + return ret; + + ret = cppc_cpufreq_set_max_perf(policy, max_perf, auto_sel, update_policy); + if (ret && ret != -EOPNOTSUPP) + return ret; + + if (update_epp) { + ret = cppc_cpufreq_update_epp_val(policy, epp_val); + if (ret) + return ret; + } + + ret = cppc_cpufreq_update_autosel_val(policy, auto_sel); + if (ret) + return ret; + + pr_debug("Updated autonomous config [%llu-%llu] for CPU%d\n", min_perf, max_perf, cpu); + + return 0; +} + static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) { unsigned int cpu = policy->cpu; struct cppc_cpudata *cpu_data; struct cppc_perf_caps *caps; + u64 min_perf, max_perf; int ret; cpu_data = cppc_cpufreq_get_cpu_data(cpu); @@ -652,11 +821,31 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) policy->cur = cppc_perf_to_khz(caps, caps->highest_perf); cpu_data->perf_ctrls.desired_perf = caps->highest_perf; - ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls); - if (ret) { - pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n", - caps->highest_perf, cpu, ret); - goto out; + if (cpu_data->perf_caps.auto_sel) { + ret = cppc_set_enable(cpu, true); + if (ret) { + pr_err("Failed to enable CPPC on cpu%d (%d)\n", cpu, ret); + goto out; + } + + min_perf = cpu_data->perf_ctrls.min_perf ? + cpu_data->perf_ctrls.min_perf : caps->lowest_nonlinear_perf; + max_perf = cpu_data->perf_ctrls.max_perf ? + cpu_data->perf_ctrls.max_perf : caps->nominal_perf; + + ret = cppc_cpufreq_update_autosel_config(policy, min_perf, max_perf, true, + CPPC_EPP_PERFORMANCE_PREF, true, false); + if (ret) { + cppc_set_enable(cpu, false); + goto out; + } + } else { + ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls); + if (ret) { + pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n", + caps->highest_perf, cpu, ret); + goto out; + } } cppc_cpufreq_cpu_fie_init(policy); @@ -821,8 +1010,30 @@ static ssize_t show_auto_select(struct cpufreq_policy *policy, char *buf) return sysfs_emit(buf, "%d\n", val); } -static ssize_t store_auto_select(struct cpufreq_policy *policy, - const char *buf, size_t count) +/** + * cppc_cpufreq_update_auto_select - Update autonomous selection config for policy->cpu + * @policy: cpufreq policy + * @enable: enable/disable autonomous selection + */ +static int cppc_cpufreq_update_auto_select(struct cpufreq_policy *policy, bool enable) +{ + struct cppc_cpudata *cpu_data = policy->driver_data; + struct cppc_perf_caps *caps = &cpu_data->perf_caps; + u64 min_perf = caps->lowest_nonlinear_perf; + u64 max_perf = caps->nominal_perf; + + if (enable) { + if (cpu_data->perf_ctrls.min_perf) + min_perf = cpu_data->perf_ctrls.min_perf; + if (cpu_data->perf_ctrls.max_perf) + max_perf = cpu_data->perf_ctrls.max_perf; + } + + return cppc_cpufreq_update_autosel_config(policy, min_perf, max_perf, enable, + 0, false, true); +} + +static ssize_t store_auto_select(struct cpufreq_policy *policy, const char *buf, size_t count) { bool val; int ret; @@ -831,91 +1042,186 @@ static ssize_t store_auto_select(struct cpufreq_policy *policy, if (ret) return ret; - ret = cppc_set_auto_sel(policy->cpu, val); + ret = cppc_cpufreq_update_auto_select(policy, val); if (ret) return ret; return count; } -static ssize_t show_auto_act_window(struct cpufreq_policy *policy, char *buf) +static ssize_t cppc_cpufreq_sysfs_show_u64(unsigned int cpu, int (*get_func)(int, u64 *), char *buf) +{ + u64 val; + int ret = get_func(cpu, &val); + + if (ret == -EOPNOTSUPP) + return sysfs_emit(buf, "\n"); + if (ret) + return ret; + + return sysfs_emit(buf, "%llu\n", val); +} + +static ssize_t cppc_cpufreq_sysfs_store_u64(unsigned int cpu, int (*set_func)(int, u64), + const char *buf, size_t count) { u64 val; int ret; - ret = cppc_get_auto_act_window(policy->cpu, &val); + ret = kstrtou64(buf, 0, &val); + if (ret) + return ret; - /* show "" when this register is not supported by cpc */ + ret = set_func((int)cpu, val); + + return ret ? ret : count; +} + +static ssize_t show_auto_act_window(struct cpufreq_policy *policy, char *buf) +{ + return cppc_cpufreq_sysfs_show_u64(policy->cpu, cppc_get_auto_act_window, buf); +} + +static ssize_t store_auto_act_window(struct cpufreq_policy *policy, const char *buf, size_t count) +{ + return cppc_cpufreq_sysfs_store_u64(policy->cpu, cppc_set_auto_act_window, buf, count); +} + +static ssize_t show_energy_performance_preference_val(struct cpufreq_policy *policy, char *buf) +{ + return cppc_cpufreq_sysfs_show_u64(policy->cpu, cppc_get_epp_perf, buf); +} + +static ssize_t store_energy_performance_preference_val(struct cpufreq_policy *policy, + const char *buf, size_t count) +{ + return cppc_cpufreq_sysfs_store_u64(policy->cpu, cppc_set_epp, buf, count); +} + +/** + * show_min_perf - Show minimum performance as frequency (kHz) + * + * Reads the MIN_PERF register and converts the performance value to + * frequency (kHz) for user-space consumption. + */ +static ssize_t show_min_perf(struct cpufreq_policy *policy, char *buf) +{ + struct cppc_cpudata *cpu_data = policy->driver_data; + u64 perf; + int ret; + + ret = cppc_get_min_perf(policy->cpu, &perf); if (ret == -EOPNOTSUPP) return sysfs_emit(buf, "\n"); - if (ret) return ret; - return sysfs_emit(buf, "%llu\n", val); + /* Convert performance to frequency (kHz) for user */ + return sysfs_emit(buf, "%u\n", cppc_perf_to_khz(&cpu_data->perf_caps, perf)); } -static ssize_t store_auto_act_window(struct cpufreq_policy *policy, - const char *buf, size_t count) +/** + * store_min_perf - Set minimum performance from frequency (kHz) + * + * Converts the user-provided frequency (kHz) to a performance value + * and writes it to the MIN_PERF register. + */ +static ssize_t store_min_perf(struct cpufreq_policy *policy, const char *buf, size_t count) { - u64 usec; + struct cppc_cpudata *cpu_data = policy->driver_data; + unsigned int freq_khz; + u64 perf; int ret; - ret = kstrtou64(buf, 0, &usec); + ret = kstrtouint(buf, 0, &freq_khz); if (ret) return ret; - ret = cppc_set_auto_act_window(policy->cpu, usec); + /* Convert frequency (kHz) to performance value */ + perf = cppc_khz_to_perf(&cpu_data->perf_caps, freq_khz); + + ret = cppc_cpufreq_set_min_perf(policy, perf, true, cpu_data->perf_caps.auto_sel); if (ret) return ret; return count; } -static ssize_t show_energy_performance_preference_val(struct cpufreq_policy *policy, char *buf) +/** + * show_max_perf - Show maximum performance as frequency (kHz) + * + * Reads the MAX_PERF register and converts the performance value to + * frequency (kHz) for user-space consumption. + */ +static ssize_t show_max_perf(struct cpufreq_policy *policy, char *buf) { - u64 val; + struct cppc_cpudata *cpu_data = policy->driver_data; + u64 perf; int ret; - ret = cppc_get_epp_perf(policy->cpu, &val); - - /* show "" when this register is not supported by cpc */ + ret = cppc_get_max_perf(policy->cpu, &perf); if (ret == -EOPNOTSUPP) return sysfs_emit(buf, "\n"); - if (ret) return ret; - return sysfs_emit(buf, "%llu\n", val); + /* Convert performance to frequency (kHz) for user */ + return sysfs_emit(buf, "%u\n", cppc_perf_to_khz(&cpu_data->perf_caps, perf)); } -static ssize_t store_energy_performance_preference_val(struct cpufreq_policy *policy, - const char *buf, size_t count) +/** + * store_max_perf - Set maximum performance from frequency (kHz) + * + * Converts the user-provided frequency (kHz) to a performance value + * and writes it to the MAX_PERF register. + */ +static ssize_t store_max_perf(struct cpufreq_policy *policy, const char *buf, size_t count) { - u64 val; + struct cppc_cpudata *cpu_data = policy->driver_data; + unsigned int freq_khz; + u64 perf; int ret; - ret = kstrtou64(buf, 0, &val); + ret = kstrtouint(buf, 0, &freq_khz); if (ret) return ret; - ret = cppc_set_epp(policy->cpu, val); + /* Convert frequency (kHz) to performance value */ + perf = cppc_khz_to_perf(&cpu_data->perf_caps, freq_khz); + + ret = cppc_cpufreq_set_max_perf(policy, perf, true, cpu_data->perf_caps.auto_sel); if (ret) return ret; return count; } +static ssize_t show_perf_limited(struct cpufreq_policy *policy, char *buf) +{ + return cppc_cpufreq_sysfs_show_u64(policy->cpu, cppc_get_perf_limited, buf); +} + +static ssize_t store_perf_limited(struct cpufreq_policy *policy, const char *buf, size_t count) +{ + return cppc_cpufreq_sysfs_store_u64(policy->cpu, cppc_set_perf_limited, buf, count); +} + cpufreq_freq_attr_ro(freqdomain_cpus); cpufreq_freq_attr_rw(auto_select); cpufreq_freq_attr_rw(auto_act_window); cpufreq_freq_attr_rw(energy_performance_preference_val); +cpufreq_freq_attr_rw(min_perf); +cpufreq_freq_attr_rw(max_perf); +cpufreq_freq_attr_rw(perf_limited); static struct freq_attr *cppc_cpufreq_attr[] = { &freqdomain_cpus, &auto_select, &auto_act_window, &energy_performance_preference_val, + &min_perf, + &max_perf, + &perf_limited, NULL, }; @@ -932,13 +1238,61 @@ static struct cpufreq_driver cppc_cpufreq_driver = { .name = "cppc_cpufreq", }; +static int cppc_cpufreq_set_epp_autosel_allcpus(bool auto_sel, u64 epp) +{ + int cpu, ret; + + for_each_present_cpu(cpu) { + ret = cppc_set_epp(cpu, epp); + if (ret) { + pr_warn("Failed to set EPP on CPU%d (%d)\n", cpu, ret); + goto disable_all; + } + + ret = cppc_set_auto_sel(cpu, auto_sel); + if (ret) { + pr_warn("Failed to set auto_sel on CPU%d (%d)\n", cpu, ret); + goto disable_all; + } + } + + return 0; + +disable_all: + pr_warn("Disabling auto_sel for all CPUs\n"); + for_each_present_cpu(cpu) + cppc_set_auto_sel(cpu, false); + + return -EIO; +} + static int __init cppc_cpufreq_init(void) { + bool auto_sel; int ret; if (!acpi_cpc_valid()) return -ENODEV; + if (auto_sel_mode) { + /* + * Check if autonomous selection is supported by testing CPU 0. + * If supported, enable autonomous mode on all CPUs. + */ + ret = cppc_get_auto_sel(0, &auto_sel); + if (!ret) { + pr_info("Enabling auto_sel_mode (autonomous selection mode)\n"); + ret = cppc_cpufreq_set_epp_autosel_allcpus(true, CPPC_EPP_PERFORMANCE_PREF); + if (ret) { + pr_warn("Disabling auto_sel_mode, fallback to standard\n"); + auto_sel_mode = false; + } + } else { + pr_warn("Disabling auto_sel_mode as not supported by hardware\n"); + auto_sel_mode = false; + } + } + cppc_freq_invariance_init(); populate_efficiency_class(); @@ -951,10 +1305,19 @@ static int __init cppc_cpufreq_init(void) static void __exit cppc_cpufreq_exit(void) { + int cpu; + + for_each_present_cpu(cpu) + cppc_set_auto_sel(cpu, false); + auto_sel_mode = false; + cpufreq_unregister_driver(&cppc_cpufreq_driver); cppc_freq_invariance_exit(); } +module_param(auto_sel_mode, bool, 0000); +MODULE_PARM_DESC(auto_sel_mode, "Enable Autonomous Performance Level Selection"); + module_exit(cppc_cpufreq_exit); MODULE_AUTHOR("Ashwin Chaugule"); MODULE_DESCRIPTION("CPUFreq driver based on the ACPI CPPC v5.0+ spec"); diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index e9e1d555cec65..e930191057c04 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -413,7 +413,7 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, * is not set. */ if (cxled->part < 0) - for (int i = 0; cxlds->nr_partitions; i++) + for (int i = 0; i < cxlds->nr_partitions; i++) if (resource_contains(&cxlds->part[i].res, res)) { cxled->part = i; break; @@ -1046,13 +1046,14 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, return -ENXIO; } + port->commit_end = cxld->id; + if (size == 0) { - dev_warn(&port->dev, + dev_dbg(&port->dev, "decoder%d.%d: Committed with zero size\n", port->id, cxld->id); - return -ENXIO; + return -ENOSPC; } - port->commit_end = cxld->id; } else { if (cxled) { struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); @@ -1210,6 +1211,8 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, rc = init_hdm_decoder(port, cxld, target_map, hdm, i, &dpa_base, info); if (rc) { + if (rc == -ENOSPC) + continue; dev_warn(&port->dev, "Failed to initialize decoder%d.%d\n", port->id, i); diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index be45211843282..adebbb1db5078 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -3410,6 +3410,14 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, int rc, part = READ_ONCE(cxled->part); struct cxl_region *cxlr; + if (part < 0 || part >= cxlds->nr_partitions) { + dev_err(cxlmd->dev.parent, + "%s:%s: invalid partition index %d (max %u)\n", + dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), + part, cxlds->nr_partitions); + return ERR_PTR(-ENXIO); + } + do { cxlr = __create_region(cxlrd, cxlds->part[part].mode, atomic_read(&cxlrd->region_id)); diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 54e219b0049ea..67ad5b007498e 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -16,19 +16,33 @@ /* CXL 2.0 8.1.3: PCIe DVSEC for CXL Device */ #define CXL_DVSEC_PCIE_DEVICE 0 -#define CXL_DVSEC_CAP_OFFSET 0xA -#define CXL_DVSEC_MEM_CAPABLE BIT(2) -#define CXL_DVSEC_HDM_COUNT_MASK GENMASK(5, 4) -#define CXL_DVSEC_CTRL_OFFSET 0xC -#define CXL_DVSEC_MEM_ENABLE BIT(2) -#define CXL_DVSEC_RANGE_SIZE_HIGH(i) (0x18 + (i * 0x10)) -#define CXL_DVSEC_RANGE_SIZE_LOW(i) (0x1C + (i * 0x10)) -#define CXL_DVSEC_MEM_INFO_VALID BIT(0) -#define CXL_DVSEC_MEM_ACTIVE BIT(1) -#define CXL_DVSEC_MEM_SIZE_LOW_MASK GENMASK(31, 28) -#define CXL_DVSEC_RANGE_BASE_HIGH(i) (0x20 + (i * 0x10)) -#define CXL_DVSEC_RANGE_BASE_LOW(i) (0x24 + (i * 0x10)) -#define CXL_DVSEC_MEM_BASE_LOW_MASK GENMASK(31, 28) +#define CXL_DVSEC_CAP_OFFSET 0xA +#define CXL_DVSEC_CACHE_CAPABLE BIT(0) +#define CXL_DVSEC_MEM_CAPABLE BIT(2) +#define CXL_DVSEC_HDM_COUNT_MASK GENMASK(5, 4) +#define CXL_DVSEC_CACHE_WBI_CAPABLE BIT(6) +#define CXL_DVSEC_CXL_RST_CAPABLE BIT(7) +#define CXL_DVSEC_CXL_RST_TIMEOUT_MASK GENMASK(10, 8) +#define CXL_DVSEC_CXL_RST_MEM_CLR_CAPABLE BIT(11) +#define CXL_DVSEC_CTRL_OFFSET 0xC +#define CXL_DVSEC_MEM_ENABLE BIT(2) +#define CXL_DVSEC_CTRL2_OFFSET 0x10 +#define CXL_DVSEC_DISABLE_CACHING BIT(0) +#define CXL_DVSEC_INIT_CACHE_WBI BIT(1) +#define CXL_DVSEC_INIT_CXL_RESET BIT(2) +#define CXL_DVSEC_CXL_RST_MEM_CLR_ENABLE BIT(3) +#define CXL_DVSEC_STATUS2_OFFSET 0x12 +#define CXL_DVSEC_CACHE_INVALID BIT(0) +#define CXL_DVSEC_CXL_RST_COMPLETE BIT(1) +#define CXL_DVSEC_CXL_RESET_ERR BIT(2) +#define CXL_DVSEC_RANGE_SIZE_HIGH(i) (0x18 + ((i) * 0x10)) +#define CXL_DVSEC_RANGE_SIZE_LOW(i) (0x1C + ((i) * 0x10)) +#define CXL_DVSEC_MEM_INFO_VALID BIT(0) +#define CXL_DVSEC_MEM_ACTIVE BIT(1) +#define CXL_DVSEC_MEM_SIZE_LOW_MASK GENMASK(31, 28) +#define CXL_DVSEC_RANGE_BASE_HIGH(i) (0x20 + ((i) * 0x10)) +#define CXL_DVSEC_RANGE_BASE_LOW(i) (0x24 + ((i) * 0x10)) +#define CXL_DVSEC_MEM_BASE_LOW_MASK GENMASK(31, 28) #define CXL_DVSEC_RANGE_MAX 2 diff --git a/drivers/firmware/arm_scmi/Makefile b/drivers/firmware/arm_scmi/Makefile index 780cd62b2f78a..caa61f16d12fc 100644 --- a/drivers/firmware/arm_scmi/Makefile +++ b/drivers/firmware/arm_scmi/Makefile @@ -8,7 +8,7 @@ scmi-driver-$(CONFIG_ARM_SCMI_RAW_MODE_SUPPORT) += raw_mode.o scmi-transport-$(CONFIG_ARM_SCMI_HAVE_SHMEM) = shmem.o scmi-transport-$(CONFIG_ARM_SCMI_HAVE_MSG) += msg.o scmi-protocols-y := base.o clock.o perf.o power.o reset.o sensors.o system.o voltage.o powercap.o -scmi-protocols-y += pinctrl.o +scmi-protocols-y += pinctrl.o mpam.o scmi-module-objs := $(scmi-driver-y) $(scmi-protocols-y) $(scmi-transport-y) obj-$(CONFIG_ARM_SCMI_PROTOCOL) += transports/ diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index a8f2247feab9d..f85077887225d 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -3443,6 +3443,7 @@ static int __init scmi_driver_init(void) scmi_system_register(); scmi_powercap_register(); scmi_pinctrl_register(); + scmi_mpam_register(); return platform_driver_register(&scmi_driver); } @@ -3461,6 +3462,7 @@ static void __exit scmi_driver_exit(void) scmi_system_unregister(); scmi_powercap_unregister(); scmi_pinctrl_unregister(); + scmi_mpam_unregister(); platform_driver_unregister(&scmi_driver); diff --git a/drivers/firmware/arm_scmi/mpam.c b/drivers/firmware/arm_scmi/mpam.c new file mode 100644 index 0000000000000..21a7d197ab60a --- /dev/null +++ b/drivers/firmware/arm_scmi/mpam.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * System Control and Management Interface (SCMI) MPAM Protocol + * + * Copyright (C) 2024 ARM Ltd. + */ + +#include "common.h" +#include + +#define SCMI_PROTOCOL_SUPPORTED_VERSION 0x10000 + +static int scmi_mpam_transfer_buf(const struct scmi_protocol_handle *ph, + u8 msg_id, void *msg_buf, size_t msg_len, + u32 *ret_val) +{ + int ret; + struct scmi_xfer *t; + + ret = ph->xops->xfer_get_init(ph, msg_id, msg_len, + ret_val ? sizeof(*ret_val) : 0, &t); + if (ret) + return ret; + + memcpy(t->tx.buf, msg_buf, msg_len); + + ret = ph->xops->do_xfer(ph, t); + if (!ret && ret_val) { + u32 value; + + memcpy(&value, t->rx.buf, sizeof(value)); + *ret_val = le32_to_cpu((__le32)value); + } + + ph->xops->xfer_put(ph, t); + + return ret; +} + +static const struct scmi_mpam_proto_ops mpam_proto_ops = { + .mpam_transfer_buf = scmi_mpam_transfer_buf, +}; + +static int scmi_mpam_protocol_init(const struct scmi_protocol_handle *ph) +{ + int ret; + u32 version; + + ret = ph->xops->version_get(ph, &version); + if (ret) + return ret; + + dev_dbg(ph->dev, "SCMI MPAM Version %d.%d\n", + PROTOCOL_REV_MAJOR(version), PROTOCOL_REV_MINOR(version)); + + return 0; +} + +static const struct scmi_protocol scmi_mpam = { + .id = SCMI_PROTOCOL_MPAM, + .owner = THIS_MODULE, + .instance_init = &scmi_mpam_protocol_init, + .ops = &mpam_proto_ops, + .supported_version = SCMI_PROTOCOL_SUPPORTED_VERSION, +}; + +DEFINE_SCMI_PROTOCOL_REGISTER_UNREGISTER(mpam, scmi_mpam) diff --git a/drivers/firmware/arm_scmi/protocols.h b/drivers/firmware/arm_scmi/protocols.h index d62c4469d1fd9..ad6fcfcfdd8d8 100644 --- a/drivers/firmware/arm_scmi/protocols.h +++ b/drivers/firmware/arm_scmi/protocols.h @@ -379,5 +379,6 @@ DECLARE_SCMI_REGISTER_UNREGISTER(sensors); DECLARE_SCMI_REGISTER_UNREGISTER(voltage); DECLARE_SCMI_REGISTER_UNREGISTER(system); DECLARE_SCMI_REGISTER_UNREGISTER(powercap); +DECLARE_SCMI_REGISTER_UNREGISTER(mpam); #endif /* _SCMI_PROTOCOLS_H */ diff --git a/drivers/gpio/gpio-tegra186.c b/drivers/gpio/gpio-tegra186.c index 5fd3ec3e2c53d..95cd22581731b 100644 --- a/drivers/gpio/gpio-tegra186.c +++ b/drivers/gpio/gpio-tegra186.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (c) 2016-2022 NVIDIA Corporation + * Copyright (c) 2016-2025 NVIDIA Corporation * * Author: Thierry Reding * Dipen Patel @@ -68,6 +68,30 @@ #define TEGRA186_GPIO_INTERRUPT_STATUS(x) (0x100 + (x) * 4) +/* Tegra410 GPIOs implemented by the COMPUTE GPIO controller */ +#define TEGRA410_COMPUTE_GPIO_PORT_A 0 +#define TEGRA410_COMPUTE_GPIO_PORT_B 1 +#define TEGRA410_COMPUTE_GPIO_PORT_C 2 +#define TEGRA410_COMPUTE_GPIO_PORT_D 3 +#define TEGRA410_COMPUTE_GPIO_PORT_E 4 + +/* Tegra410 GPIOs implemented by the SYSTEM GPIO controller */ +#define TEGRA410_SYSTEM_GPIO_PORT_A 0 +#define TEGRA410_SYSTEM_GPIO_PORT_B 1 +#define TEGRA410_SYSTEM_GPIO_PORT_C 2 +#define TEGRA410_SYSTEM_GPIO_PORT_D 3 +#define TEGRA410_SYSTEM_GPIO_PORT_E 4 +#define TEGRA410_SYSTEM_GPIO_PORT_I 5 +#define TEGRA410_SYSTEM_GPIO_PORT_J 6 +#define TEGRA410_SYSTEM_GPIO_PORT_K 7 +#define TEGRA410_SYSTEM_GPIO_PORT_L 8 +#define TEGRA410_SYSTEM_GPIO_PORT_M 9 +#define TEGRA410_SYSTEM_GPIO_PORT_N 10 +#define TEGRA410_SYSTEM_GPIO_PORT_P 11 +#define TEGRA410_SYSTEM_GPIO_PORT_Q 12 +#define TEGRA410_SYSTEM_GPIO_PORT_R 13 +#define TEGRA410_SYSTEM_GPIO_PORT_V 14 + struct tegra_gpio_port { const char *name; unsigned int bank; @@ -1001,14 +1025,17 @@ static int tegra186_gpio_probe(struct platform_device *pdev) return devm_gpiochip_add_data(&pdev->dev, &gpio->gpio, gpio); } -#define TEGRA186_MAIN_GPIO_PORT(_name, _bank, _port, _pins) \ - [TEGRA186_MAIN_GPIO_PORT_##_name] = { \ - .name = #_name, \ - .bank = _bank, \ - .port = _port, \ - .pins = _pins, \ +#define TEGRA_GPIO_PORT(_prefix, _name, _bank, _port, _pins) \ + [_prefix##_GPIO_PORT_##_name] = { \ + .name = #_name, \ + .bank = _bank, \ + .port = _port, \ + .pins = _pins, \ } +#define TEGRA186_MAIN_GPIO_PORT(_name, _bank, _port, _pins) \ + TEGRA_GPIO_PORT(TEGRA186_MAIN, _name, _bank, _port, _pins) + static const struct tegra_gpio_port tegra186_main_ports[] = { TEGRA186_MAIN_GPIO_PORT( A, 2, 0, 7), TEGRA186_MAIN_GPIO_PORT( B, 3, 0, 7), @@ -1044,13 +1071,8 @@ static const struct tegra_gpio_soc tegra186_main_soc = { .has_vm_support = false, }; -#define TEGRA186_AON_GPIO_PORT(_name, _bank, _port, _pins) \ - [TEGRA186_AON_GPIO_PORT_##_name] = { \ - .name = #_name, \ - .bank = _bank, \ - .port = _port, \ - .pins = _pins, \ - } +#define TEGRA186_AON_GPIO_PORT(_name, _bank, _port, _pins) \ + TEGRA_GPIO_PORT(TEGRA186_AON, _name, _bank, _port, _pins) static const struct tegra_gpio_port tegra186_aon_ports[] = { TEGRA186_AON_GPIO_PORT( S, 0, 1, 5), @@ -1072,13 +1094,8 @@ static const struct tegra_gpio_soc tegra186_aon_soc = { .has_vm_support = false, }; -#define TEGRA194_MAIN_GPIO_PORT(_name, _bank, _port, _pins) \ - [TEGRA194_MAIN_GPIO_PORT_##_name] = { \ - .name = #_name, \ - .bank = _bank, \ - .port = _port, \ - .pins = _pins, \ - } +#define TEGRA194_MAIN_GPIO_PORT(_name, _bank, _port, _pins) \ + TEGRA_GPIO_PORT(TEGRA194_MAIN, _name, _bank, _port, _pins) static const struct tegra_gpio_port tegra194_main_ports[] = { TEGRA194_MAIN_GPIO_PORT( A, 1, 2, 8), @@ -1128,13 +1145,8 @@ static const struct tegra_gpio_soc tegra194_main_soc = { .has_vm_support = true, }; -#define TEGRA194_AON_GPIO_PORT(_name, _bank, _port, _pins) \ - [TEGRA194_AON_GPIO_PORT_##_name] = { \ - .name = #_name, \ - .bank = _bank, \ - .port = _port, \ - .pins = _pins, \ - } +#define TEGRA194_AON_GPIO_PORT(_name, _bank, _port, _pins) \ + TEGRA_GPIO_PORT(TEGRA194_AON, _name, _bank, _port, _pins) static const struct tegra_gpio_port tegra194_aon_ports[] = { TEGRA194_AON_GPIO_PORT(AA, 0, 3, 8), @@ -1154,13 +1166,8 @@ static const struct tegra_gpio_soc tegra194_aon_soc = { .has_vm_support = false, }; -#define TEGRA234_MAIN_GPIO_PORT(_name, _bank, _port, _pins) \ - [TEGRA234_MAIN_GPIO_PORT_##_name] = { \ - .name = #_name, \ - .bank = _bank, \ - .port = _port, \ - .pins = _pins, \ - } +#define TEGRA234_MAIN_GPIO_PORT(_name, _bank, _port, _pins) \ + TEGRA_GPIO_PORT(TEGRA234_MAIN, _name, _bank, _port, _pins) static const struct tegra_gpio_port tegra234_main_ports[] = { TEGRA234_MAIN_GPIO_PORT( A, 0, 0, 8), @@ -1199,13 +1206,8 @@ static const struct tegra_gpio_soc tegra234_main_soc = { .has_vm_support = true, }; -#define TEGRA234_AON_GPIO_PORT(_name, _bank, _port, _pins) \ - [TEGRA234_AON_GPIO_PORT_##_name] = { \ - .name = #_name, \ - .bank = _bank, \ - .port = _port, \ - .pins = _pins, \ - } +#define TEGRA234_AON_GPIO_PORT(_name, _bank, _port, _pins) \ + TEGRA_GPIO_PORT(TEGRA234_AON, _name, _bank, _port, _pins) static const struct tegra_gpio_port tegra234_aon_ports[] = { TEGRA234_AON_GPIO_PORT(AA, 0, 4, 8), @@ -1226,13 +1228,8 @@ static const struct tegra_gpio_soc tegra234_aon_soc = { .has_vm_support = false, }; -#define TEGRA241_MAIN_GPIO_PORT(_name, _bank, _port, _pins) \ - [TEGRA241_MAIN_GPIO_PORT_##_name] = { \ - .name = #_name, \ - .bank = _bank, \ - .port = _port, \ - .pins = _pins, \ - } +#define TEGRA241_MAIN_GPIO_PORT(_name, _bank, _port, _pins) \ + TEGRA_GPIO_PORT(TEGRA241_MAIN, _name, _bank, _port, _pins) static const struct tegra_gpio_port tegra241_main_ports[] = { TEGRA241_MAIN_GPIO_PORT(A, 0, 0, 8), @@ -1257,13 +1254,8 @@ static const struct tegra_gpio_soc tegra241_main_soc = { .has_vm_support = false, }; -#define TEGRA241_AON_GPIO_PORT(_name, _bank, _port, _pins) \ - [TEGRA241_AON_GPIO_PORT_##_name] = { \ - .name = #_name, \ - .bank = _bank, \ - .port = _port, \ - .pins = _pins, \ - } +#define TEGRA241_AON_GPIO_PORT(_name, _bank, _port, _pins) \ + TEGRA_GPIO_PORT(TEGRA241_AON, _name, _bank, _port, _pins) static const struct tegra_gpio_port tegra241_aon_ports[] = { TEGRA241_AON_GPIO_PORT(AA, 0, 0, 8), @@ -1279,6 +1271,54 @@ static const struct tegra_gpio_soc tegra241_aon_soc = { .has_vm_support = false, }; +#define TEGRA410_COMPUTE_GPIO_PORT(_name, _bank, _port, _pins) \ + TEGRA_GPIO_PORT(TEGRA410_COMPUTE, _name, _bank, _port, _pins) + +static const struct tegra_gpio_port tegra410_compute_ports[] = { + TEGRA410_COMPUTE_GPIO_PORT(A, 0, 0, 3), + TEGRA410_COMPUTE_GPIO_PORT(B, 1, 0, 8), + TEGRA410_COMPUTE_GPIO_PORT(C, 1, 1, 3), + TEGRA410_COMPUTE_GPIO_PORT(D, 2, 0, 8), + TEGRA410_COMPUTE_GPIO_PORT(E, 2, 1, 8), +}; + +static const struct tegra_gpio_soc tegra410_compute_soc = { + .num_ports = ARRAY_SIZE(tegra410_compute_ports), + .ports = tegra410_compute_ports, + .name = "tegra410-gpio-compute", + .num_irqs_per_bank = 8, + .instance = 0, +}; + +#define TEGRA410_SYSTEM_GPIO_PORT(_name, _bank, _port, _pins) \ + TEGRA_GPIO_PORT(TEGRA410_SYSTEM, _name, _bank, _port, _pins) + +static const struct tegra_gpio_port tegra410_system_ports[] = { + TEGRA410_SYSTEM_GPIO_PORT(A, 0, 0, 7), + TEGRA410_SYSTEM_GPIO_PORT(B, 0, 1, 8), + TEGRA410_SYSTEM_GPIO_PORT(C, 0, 2, 8), + TEGRA410_SYSTEM_GPIO_PORT(D, 0, 3, 8), + TEGRA410_SYSTEM_GPIO_PORT(E, 0, 4, 6), + TEGRA410_SYSTEM_GPIO_PORT(I, 1, 0, 8), + TEGRA410_SYSTEM_GPIO_PORT(J, 1, 1, 7), + TEGRA410_SYSTEM_GPIO_PORT(K, 1, 2, 7), + TEGRA410_SYSTEM_GPIO_PORT(L, 1, 3, 7), + TEGRA410_SYSTEM_GPIO_PORT(M, 2, 0, 7), + TEGRA410_SYSTEM_GPIO_PORT(N, 2, 1, 6), + TEGRA410_SYSTEM_GPIO_PORT(P, 2, 2, 8), + TEGRA410_SYSTEM_GPIO_PORT(Q, 2, 3, 3), + TEGRA410_SYSTEM_GPIO_PORT(R, 2, 4, 2), + TEGRA410_SYSTEM_GPIO_PORT(V, 1, 4, 2), +}; + +static const struct tegra_gpio_soc tegra410_system_soc = { + .num_ports = ARRAY_SIZE(tegra410_system_ports), + .ports = tegra410_system_ports, + .name = "tegra410-gpio-system", + .num_irqs_per_bank = 8, + .instance = 0, +}; + static const struct of_device_id tegra186_gpio_of_match[] = { { .compatible = "nvidia,tegra186-gpio", @@ -1311,6 +1351,8 @@ static const struct acpi_device_id tegra186_gpio_acpi_match[] = { { .id = "NVDA0408", .driver_data = (kernel_ulong_t)&tegra194_aon_soc }, { .id = "NVDA0508", .driver_data = (kernel_ulong_t)&tegra241_main_soc }, { .id = "NVDA0608", .driver_data = (kernel_ulong_t)&tegra241_aon_soc }, + { .id = "NVDA0708", .driver_data = (kernel_ulong_t)&tegra410_compute_soc }, + { .id = "NVDA0808", .driver_data = (kernel_ulong_t)&tegra410_system_soc }, {} }; MODULE_DEVICE_TABLE(acpi, tegra186_gpio_acpi_match); diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 597538bfe2f83..2ce65c46c0cb3 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -2288,12 +2288,16 @@ config SENSORS_INA3221 config SENSORS_SPD5118 tristate "SPD5118 Compliant Temperature Sensors" - depends on I2C - select REGMAP_I2C + depends on I2C || I3C + select REGMAP_I2C if I2C + select REGMAP_I3C if I3C help If you say yes here you get support for SPD5118 (JEDEC JESD300) - compliant temperature sensors. Such sensors are found on DDR5 memory - modules. + compliant temperature sensors using I2C or I3C bus interface. + Such sensors are found on DDR5 memory modules. + + This driver supports both I2C and I3C interfaces. I3C devices + use 16-bit register addressing mode as specified in JESD300-5B. This driver can also be built as a module. If so, the module will be called spd5118. diff --git a/drivers/hwmon/spd5118.c b/drivers/hwmon/spd5118.c index 5da44571b6a0c..531ef3ce32a70 100644 --- a/drivers/hwmon/spd5118.c +++ b/drivers/hwmon/spd5118.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -770,7 +771,67 @@ static struct i2c_driver spd5118_i2c_driver = { .address_list = IS_ENABLED(CONFIG_SENSORS_SPD5118_DETECT) ? normal_i2c : NULL, }; -module_i2c_driver(spd5118_i2c_driver); +/* I3C */ + +static int spd5118_i3c_probe(struct i3c_device *i3cdev) +{ + struct device *dev = i3cdev_to_dev(i3cdev); + struct regmap *regmap; + unsigned int regval; + int err; + + /* + * I3C devices use 16-bit register addressing. + * Per SPD5118 specification section 7.2, I3C interface uses + * 16-bit register address mode. + */ + regmap = devm_regmap_init_i3c(i3cdev, &spd5118_regmap8_config); + if (IS_ERR(regmap)) + return dev_err_probe(dev, PTR_ERR(regmap), "regmap init failed\n"); + + /* Verify this is a SPD5118 device */ + err = regmap_read(regmap, SPD5118_REG_TYPE, ®val); + if (err) + return err; + + /* Check device type - should be 0x51 in first register */ + if (regval != 0x51) + return -ENODEV; + + err = regmap_read(regmap, SPD5118_REG_TYPE + 1, ®val); + if (err) + return err; + + /* Second register should be 0x18 (combined: 0x5118) */ + if (regval != 0x18) + return -ENODEV; + + /* I3C devices always use 16-bit addressing */ + return spd5118_common_probe(dev, regmap, true); +} + +/* + * SPD5118 does not have a manufacturer/part ID defined in the + * JESD specification for I3C. We use a generic match for now. + * Devices should be instantiated via device tree or ACPI. + */ +static const struct i3c_device_id spd5118_i3c_ids[] = { + I3C_CLASS(I3C_DCR_GENERIC_DEVICE, NULL), + { } +}; +MODULE_DEVICE_TABLE(i3c, spd5118_i3c_ids); + +static struct i3c_driver spd5118_i3c_driver = { + .driver = { + .name = "spd5118_i3c", + .of_match_table = spd5118_of_ids, + .pm = pm_sleep_ptr(&spd5118_pm_ops), + }, + .probe = spd5118_i3c_probe, + .id_table = spd5118_i3c_ids, +}; + +module_i3c_i2c_driver(spd5118_i3c_driver, &spd5118_i2c_driver) MODULE_AUTHOR("René Rebe "); MODULE_AUTHOR("Guenter Roeck "); diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 4eb31b913c1a7..44afef32d6569 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -30,47 +30,37 @@ #define BYTES_PER_FIFO_WORD 4 -#define I2C_CNFG 0x000 #define I2C_CNFG_DEBOUNCE_CNT GENMASK(14, 12) #define I2C_CNFG_PACKET_MODE_EN BIT(10) #define I2C_CNFG_NEW_MASTER_FSM BIT(11) #define I2C_CNFG_MULTI_MASTER_MODE BIT(17) -#define I2C_STATUS 0x01c -#define I2C_SL_CNFG 0x020 + #define I2C_SL_CNFG_NACK BIT(1) #define I2C_SL_CNFG_NEWSL BIT(2) -#define I2C_SL_ADDR1 0x02c -#define I2C_SL_ADDR2 0x030 -#define I2C_TLOW_SEXT 0x034 -#define I2C_TX_FIFO 0x050 -#define I2C_RX_FIFO 0x054 -#define I2C_PACKET_TRANSFER_STATUS 0x058 -#define I2C_FIFO_CONTROL 0x05c + #define I2C_FIFO_CONTROL_TX_FLUSH BIT(1) #define I2C_FIFO_CONTROL_RX_FLUSH BIT(0) #define I2C_FIFO_CONTROL_TX_TRIG(x) (((x) - 1) << 5) #define I2C_FIFO_CONTROL_RX_TRIG(x) (((x) - 1) << 2) -#define I2C_FIFO_STATUS 0x060 + #define I2C_FIFO_STATUS_TX GENMASK(7, 4) #define I2C_FIFO_STATUS_RX GENMASK(3, 0) -#define I2C_INT_MASK 0x064 -#define I2C_INT_STATUS 0x068 + #define I2C_INT_BUS_CLR_DONE BIT(11) #define I2C_INT_PACKET_XFER_COMPLETE BIT(7) #define I2C_INT_NO_ACK BIT(3) #define I2C_INT_ARBITRATION_LOST BIT(2) #define I2C_INT_TX_FIFO_DATA_REQ BIT(1) #define I2C_INT_RX_FIFO_DATA_REQ BIT(0) -#define I2C_CLK_DIVISOR 0x06c + #define I2C_CLK_DIVISOR_STD_FAST_MODE GENMASK(31, 16) #define I2C_CLK_DIVISOR_HSMODE GENMASK(15, 0) -#define DVC_CTRL_REG1 0x000 #define DVC_CTRL_REG1_INTR_EN BIT(10) -#define DVC_CTRL_REG3 0x008 + #define DVC_CTRL_REG3_SW_PROG BIT(26) #define DVC_CTRL_REG3_I2C_DONE_INTR_EN BIT(30) -#define DVC_STATUS 0x00c + #define DVC_STATUS_I2C_DONE_INTR BIT(30) #define I2C_ERR_NONE 0x00 @@ -85,6 +75,7 @@ #define PACKET_HEADER0_PROTOCOL GENMASK(7, 4) #define PACKET_HEADER0_PROTOCOL_I2C 1 +#define I2C_HEADER_HS_MODE BIT(22) #define I2C_HEADER_CONT_ON_NAK BIT(21) #define I2C_HEADER_READ BIT(19) #define I2C_HEADER_10BIT_ADDR BIT(18) @@ -93,48 +84,44 @@ #define I2C_HEADER_CONTINUE_XFER BIT(15) #define I2C_HEADER_SLAVE_ADDR_SHIFT 1 -#define I2C_BUS_CLEAR_CNFG 0x084 #define I2C_BC_SCLK_THRESHOLD GENMASK(23, 16) #define I2C_BC_STOP_COND BIT(2) #define I2C_BC_TERMINATE BIT(1) #define I2C_BC_ENABLE BIT(0) -#define I2C_BUS_CLEAR_STATUS 0x088 + #define I2C_BC_STATUS BIT(0) -#define I2C_CONFIG_LOAD 0x08c #define I2C_MSTR_CONFIG_LOAD BIT(0) -#define I2C_CLKEN_OVERRIDE 0x090 #define I2C_MST_CORE_CLKEN_OVR BIT(0) -#define I2C_INTERFACE_TIMING_0 0x094 -#define I2C_INTERFACE_TIMING_THIGH GENMASK(13, 8) -#define I2C_INTERFACE_TIMING_TLOW GENMASK(5, 0) -#define I2C_INTERFACE_TIMING_1 0x098 -#define I2C_INTERFACE_TIMING_TBUF GENMASK(29, 24) -#define I2C_INTERFACE_TIMING_TSU_STO GENMASK(21, 16) -#define I2C_INTERFACE_TIMING_THD_STA GENMASK(13, 8) -#define I2C_INTERFACE_TIMING_TSU_STA GENMASK(5, 0) - -#define I2C_HS_INTERFACE_TIMING_0 0x09c -#define I2C_HS_INTERFACE_TIMING_THIGH GENMASK(13, 8) -#define I2C_HS_INTERFACE_TIMING_TLOW GENMASK(5, 0) -#define I2C_HS_INTERFACE_TIMING_1 0x0a0 -#define I2C_HS_INTERFACE_TIMING_TSU_STO GENMASK(21, 16) -#define I2C_HS_INTERFACE_TIMING_THD_STA GENMASK(13, 8) -#define I2C_HS_INTERFACE_TIMING_TSU_STA GENMASK(5, 0) - -#define I2C_MST_FIFO_CONTROL 0x0b4 +#define I2C_INTERFACE_TIMING_THIGH GENMASK(13, 8) +#define I2C_INTERFACE_TIMING_TLOW GENMASK(5, 0) +#define I2C_INTERFACE_TIMING_TBUF GENMASK(29, 24) +#define I2C_INTERFACE_TIMING_TSU_STO GENMASK(21, 16) +#define I2C_INTERFACE_TIMING_THD_STA GENMASK(13, 8) +#define I2C_INTERFACE_TIMING_TSU_STA GENMASK(5, 0) + +#define I2C_HS_INTERFACE_TIMING_THIGH GENMASK(13, 8) +#define I2C_HS_INTERFACE_TIMING_TLOW GENMASK(5, 0) +#define I2C_HS_INTERFACE_TIMING_TSU_STO GENMASK(21, 16) +#define I2C_HS_INTERFACE_TIMING_THD_STA GENMASK(13, 8) +#define I2C_HS_INTERFACE_TIMING_TSU_STA GENMASK(5, 0) + #define I2C_MST_FIFO_CONTROL_RX_FLUSH BIT(0) #define I2C_MST_FIFO_CONTROL_TX_FLUSH BIT(1) #define I2C_MST_FIFO_CONTROL_RX_TRIG(x) (((x) - 1) << 4) #define I2C_MST_FIFO_CONTROL_TX_TRIG(x) (((x) - 1) << 16) -#define I2C_MST_FIFO_STATUS 0x0b8 #define I2C_MST_FIFO_STATUS_TX GENMASK(23, 16) #define I2C_MST_FIFO_STATUS_RX GENMASK(7, 0) -#define I2C_MASTER_RESET_CNTRL 0x0a8 +#define I2C_SW_MUTEX_REQUEST GENMASK(3, 0) +#define I2C_SW_MUTEX_GRANT GENMASK(7, 4) +#define I2C_SW_MUTEX_ID_CCPLEX 9 + +/* SW mutex acquire timeout value in microseconds. */ +#define I2C_SW_MUTEX_TIMEOUT_US (25 * USEC_PER_MSEC) /* configuration load timeout in microseconds */ #define I2C_CONFIG_LOAD_TIMEOUT 1000000 @@ -150,6 +137,178 @@ */ #define I2C_PIO_MODE_PREFERRED_LEN 32 +struct tegra_i2c_regs { + unsigned int cnfg; + unsigned int status; + unsigned int sl_cnfg; + unsigned int sl_addr1; + unsigned int sl_addr2; + unsigned int tlow_sext; + unsigned int tx_fifo; + unsigned int rx_fifo; + unsigned int packet_transfer_status; + unsigned int fifo_control; + unsigned int fifo_status; + unsigned int int_mask; + unsigned int int_status; + unsigned int clk_divisor; + unsigned int bus_clear_cnfg; + unsigned int bus_clear_status; + unsigned int config_load; + unsigned int clken_override; + unsigned int interface_timing_0; + unsigned int interface_timing_1; + unsigned int hs_interface_timing_0; + unsigned int hs_interface_timing_1; + unsigned int master_reset_cntrl; + unsigned int mst_fifo_control; + unsigned int mst_fifo_status; + unsigned int sw_mutex; + unsigned int dvc_ctrl_reg1; + unsigned int dvc_ctrl_reg3; + unsigned int dvc_status; +}; + +static const struct tegra_i2c_regs tegra20_i2c_regs = { + .cnfg = 0x000, + .status = 0x01c, + .sl_cnfg = 0x020, + .sl_addr1 = 0x02c, + .sl_addr2 = 0x030, + .tx_fifo = 0x050, + .rx_fifo = 0x054, + .packet_transfer_status = 0x058, + .fifo_control = 0x05c, + .fifo_status = 0x060, + .int_mask = 0x064, + .int_status = 0x068, + .clk_divisor = 0x06c, + .bus_clear_cnfg = 0x084, + .bus_clear_status = 0x088, + .config_load = 0x08c, + .clken_override = 0x090, + .interface_timing_0 = 0x094, + .interface_timing_1 = 0x098, + .hs_interface_timing_0 = 0x09c, + .hs_interface_timing_1 = 0x0a0, + .master_reset_cntrl = 0x0a8, + .mst_fifo_control = 0x0b4, + .mst_fifo_status = 0x0b8, +}; + +#if IS_ENABLED(CONFIG_ARCH_TEGRA_2x_SOC) +static const struct tegra_i2c_regs tegra20_dvc_i2c_regs = { + .dvc_ctrl_reg1 = 0x000, + .dvc_ctrl_reg3 = 0x008, + .dvc_status = 0x00c, + .cnfg = 0x040, + .status = 0x05c, + .tx_fifo = 0x060, + .rx_fifo = 0x064, + .packet_transfer_status = 0x068, + .fifo_control = 0x06c, + .fifo_status = 0x070, + .int_mask = 0x074, + .int_status = 0x078, + .clk_divisor = 0x07c, + .bus_clear_cnfg = 0x0c4, + .bus_clear_status = 0x0c8, + .config_load = 0x0cc, + .clken_override = 0x0d0, + .interface_timing_0 = 0x0d4, + .interface_timing_1 = 0x0d8, + .hs_interface_timing_0 = 0x0dc, + .hs_interface_timing_1 = 0x0e0, + .master_reset_cntrl = 0x0e8, + .mst_fifo_control = 0x0c4, + .mst_fifo_status = 0x0c8, +}; +#endif + +#if IS_ENABLED(CONFIG_ARCH_TEGRA_210_SOC) +static const struct tegra_i2c_regs tegra210_vi_i2c_regs = { + .cnfg = 0x0c00, + .status = 0x0c70, + .tlow_sext = 0x0cd0, + .tx_fifo = 0x0d40, + .rx_fifo = 0x0d50, + .packet_transfer_status = 0x0d60, + .fifo_control = 0x0d70, + .fifo_status = 0x0d80, + .int_mask = 0x0d90, + .int_status = 0x0da0, + .clk_divisor = 0x0db0, + .bus_clear_cnfg = 0x0e10, + .bus_clear_status = 0x0e20, + .config_load = 0x0e30, + .clken_override = 0x0e40, + .interface_timing_0 = 0x0e50, + .interface_timing_1 = 0x0e60, + .hs_interface_timing_0 = 0x0e70, + .hs_interface_timing_1 = 0x0e80, + .master_reset_cntrl = 0x0ea0, + .mst_fifo_control = 0x0ed0, + .mst_fifo_status = 0x0ee0, +}; +#endif + +static const struct tegra_i2c_regs tegra264_i2c_regs = { + .cnfg = 0x000, + .status = 0x01c, + .sl_cnfg = 0x020, + .sl_addr1 = 0x02c, + .sl_addr2 = 0x030, + .tx_fifo = 0x050, + .rx_fifo = 0x054, + .packet_transfer_status = 0x058, + .fifo_control = 0x05c, + .fifo_status = 0x060, + .int_mask = 0x064, + .int_status = 0x068, + .clk_divisor = 0x06c, + .bus_clear_cnfg = 0x084, + .bus_clear_status = 0x088, + .config_load = 0x08c, + .clken_override = 0x090, + .interface_timing_0 = 0x094, + .interface_timing_1 = 0x098, + .hs_interface_timing_0 = 0x09c, + .hs_interface_timing_1 = 0x0a0, + .master_reset_cntrl = 0x0a8, + .mst_fifo_control = 0x0b4, + .mst_fifo_status = 0x0b8, + .sw_mutex = 0x0ec, +}; + +static const struct tegra_i2c_regs tegra410_i2c_regs = { + .cnfg = 0x000, + .status = 0x01c, + .sl_cnfg = 0x020, + .sl_addr1 = 0x02c, + .sl_addr2 = 0x030, + .tlow_sext = 0x034, + .tx_fifo = 0x054, + .rx_fifo = 0x058, + .packet_transfer_status = 0x05c, + .fifo_control = 0x060, + .fifo_status = 0x064, + .int_mask = 0x068, + .int_status = 0x06c, + .clk_divisor = 0x070, + .bus_clear_cnfg = 0x088, + .bus_clear_status = 0x08c, + .config_load = 0x090, + .clken_override = 0x094, + .interface_timing_0 = 0x098, + .interface_timing_1 = 0x09c, + .hs_interface_timing_0 = 0x0a0, + .hs_interface_timing_1 = 0x0a4, + .master_reset_cntrl = 0x0ac, + .mst_fifo_control = 0x0b8, + .mst_fifo_status = 0x0bc, + .sw_mutex = 0x0f0, +}; + /* * msg_end_type: The bus control which needs to be sent at end of transfer. * @MSG_END_STOP: Send stop pulse. @@ -162,6 +321,18 @@ enum msg_end_type { MSG_END_CONTINUE, }; +/* + * tegra_i2c_variant: Identifies the variant of I2C controller. + * @TEGRA_I2C_VARIANT_DEFAULT: Identifies the default I2C controller. + * @TEGRA_I2C_VARIANT_DVC: Identifies the DVC I2C controller, has a different register layout. + * @TEGRA_I2C_VARIANT_VI: Identifies the VI I2C controller, has a different register layout. + */ +enum tegra_i2c_variant { + TEGRA_I2C_VARIANT_DEFAULT, + TEGRA_I2C_VARIANT_DVC, + TEGRA_I2C_VARIANT_VI, +}; + /** * struct tegra_i2c_hw_feature : per hardware generation features * @has_continue_xfer_support: continue-transfer supported @@ -196,16 +367,26 @@ enum msg_end_type { * @has_apb_dma: Support of APBDMA on corresponding Tegra chip. * @tlow_std_mode: Low period of the clock in standard mode. * @thigh_std_mode: High period of the clock in standard mode. - * @tlow_fast_fastplus_mode: Low period of the clock in fast/fast-plus modes. - * @thigh_fast_fastplus_mode: High period of the clock in fast/fast-plus modes. + * @tlow_fast_mode: Low period of the clock in fast mode. + * @thigh_fast_mode: High period of the clock in fast mode. + * @tlow_fastplus_mode: Low period of the clock in fast-plus mode. + * @thigh_fastplus_mode: High period of the clock in fast-plus mode. + * @tlow_hs_mode: Low period of the clock in HS mode. + * @thigh_hs_mode: High period of the clock in HS mode. * @setup_hold_time_std_mode: Setup and hold time for start and stop conditions * in standard mode. - * @setup_hold_time_fast_fast_plus_mode: Setup and hold time for start and stop - * conditions in fast/fast-plus modes. + * @setup_hold_time_fast_mode: Setup and hold time for start and stop + * conditions in fast mode. + * @setup_hold_time_fastplus_mode: Setup and hold time for start and stop + * conditions in fast-plus mode. * @setup_hold_time_hs_mode: Setup and hold time for start and stop conditions * in HS mode. * @has_interface_timing_reg: Has interface timing register to program the tuned * timing settings. + * @enable_hs_mode_support: Enable support for high speed (HS) mode transfers. + * @has_mutex: Has mutex register for mutual exclusion with other firmwares or VMs. + * @variant: This represents the I2C controller variant. + * @regs: Register offsets for the specific SoC variant. */ struct tegra_i2c_hw_feature { bool has_continue_xfer_support; @@ -224,12 +405,21 @@ struct tegra_i2c_hw_feature { bool has_apb_dma; u32 tlow_std_mode; u32 thigh_std_mode; - u32 tlow_fast_fastplus_mode; - u32 thigh_fast_fastplus_mode; + u32 tlow_fast_mode; + u32 thigh_fast_mode; + u32 tlow_fastplus_mode; + u32 thigh_fastplus_mode; + u32 tlow_hs_mode; + u32 thigh_hs_mode; u32 setup_hold_time_std_mode; - u32 setup_hold_time_fast_fast_plus_mode; + u32 setup_hold_time_fast_mode; + u32 setup_hold_time_fastplus_mode; u32 setup_hold_time_hs_mode; bool has_interface_timing_reg; + bool enable_hs_mode_support; + bool has_mutex; + enum tegra_i2c_variant variant; + const struct tegra_i2c_regs *regs; }; /** @@ -245,8 +435,6 @@ struct tegra_i2c_hw_feature { * @base_phys: physical base address of the I2C controller * @cont_id: I2C controller ID, used for packet header * @irq: IRQ number of transfer complete interrupt - * @is_dvc: identifies the DVC I2C controller, has a different register layout - * @is_vi: identifies the VI I2C controller, has a different register layout * @msg_complete: transfer completion notifier * @msg_buf_remaining: size of unsent data in the message buffer * @msg_len: length of message in current transfer @@ -299,58 +487,33 @@ struct tegra_i2c_dev { bool atomic_mode; bool dma_mode; bool msg_read; - bool is_dvc; - bool is_vi; }; -#define IS_DVC(dev) (IS_ENABLED(CONFIG_ARCH_TEGRA_2x_SOC) && (dev)->is_dvc) -#define IS_VI(dev) (IS_ENABLED(CONFIG_ARCH_TEGRA_210_SOC) && (dev)->is_vi) - -static void dvc_writel(struct tegra_i2c_dev *i2c_dev, u32 val, - unsigned int reg) -{ - writel_relaxed(val, i2c_dev->base + reg); -} - -static u32 dvc_readl(struct tegra_i2c_dev *i2c_dev, unsigned int reg) -{ - return readl_relaxed(i2c_dev->base + reg); -} - -/* - * If necessary, i2c_writel() and i2c_readl() will offset the register - * in order to talk to the I2C block inside the DVC block. - */ -static u32 tegra_i2c_reg_addr(struct tegra_i2c_dev *i2c_dev, unsigned int reg) -{ - if (IS_DVC(i2c_dev)) - reg += (reg >= I2C_TX_FIFO) ? 0x10 : 0x40; - else if (IS_VI(i2c_dev)) - reg = 0xc00 + (reg << 2); - - return reg; -} +#define IS_DVC(dev) (IS_ENABLED(CONFIG_ARCH_TEGRA_2x_SOC) && \ + (dev)->hw->variant == TEGRA_I2C_VARIANT_DVC) +#define IS_VI(dev) (IS_ENABLED(CONFIG_ARCH_TEGRA_210_SOC) && \ + (dev)->hw->variant == TEGRA_I2C_VARIANT_VI) static void i2c_writel(struct tegra_i2c_dev *i2c_dev, u32 val, unsigned int reg) { - writel_relaxed(val, i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, reg)); + writel_relaxed(val, i2c_dev->base + reg); /* read back register to make sure that register writes completed */ - if (reg != I2C_TX_FIFO) - readl_relaxed(i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, reg)); + if (!IS_DVC(i2c_dev) && reg != i2c_dev->hw->regs->tx_fifo) + readl_relaxed(i2c_dev->base + reg); else if (IS_VI(i2c_dev)) - readl_relaxed(i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, I2C_INT_STATUS)); + readl_relaxed(i2c_dev->base + i2c_dev->hw->regs->int_status); } static u32 i2c_readl(struct tegra_i2c_dev *i2c_dev, unsigned int reg) { - return readl_relaxed(i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, reg)); + return readl_relaxed(i2c_dev->base + reg); } static void i2c_writesl(struct tegra_i2c_dev *i2c_dev, void *data, unsigned int reg, unsigned int len) { - writesl(i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, reg), data, len); + writesl(i2c_dev->base + reg, data, len); } static void i2c_writesl_vi(struct tegra_i2c_dev *i2c_dev, void *data, @@ -371,23 +534,93 @@ static void i2c_writesl_vi(struct tegra_i2c_dev *i2c_dev, void *data, static void i2c_readsl(struct tegra_i2c_dev *i2c_dev, void *data, unsigned int reg, unsigned int len) { - readsl(i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, reg), data, len); + readsl(i2c_dev->base + reg, data, len); +} + +static bool tegra_i2c_mutex_acquired(struct tegra_i2c_dev *i2c_dev) +{ + unsigned int reg = i2c_dev->hw->regs->sw_mutex; + u32 val, id; + + val = readl(i2c_dev->base + reg); + id = FIELD_GET(I2C_SW_MUTEX_GRANT, val); + + return id == I2C_SW_MUTEX_ID_CCPLEX; +} + +static bool tegra_i2c_mutex_trylock(struct tegra_i2c_dev *i2c_dev) +{ + unsigned int reg = i2c_dev->hw->regs->sw_mutex; + u32 val, id; + + val = readl(i2c_dev->base + reg); + id = FIELD_GET(I2C_SW_MUTEX_GRANT, val); + if (id != 0 && id != I2C_SW_MUTEX_ID_CCPLEX) + return false; + + val = FIELD_PREP(I2C_SW_MUTEX_REQUEST, I2C_SW_MUTEX_ID_CCPLEX); + writel(val, i2c_dev->base + reg); + + return tegra_i2c_mutex_acquired(i2c_dev); +} + +static int tegra_i2c_mutex_lock(struct tegra_i2c_dev *i2c_dev) +{ + bool locked; + int ret; + + if (!i2c_dev->hw->has_mutex) + return 0; + + if (i2c_dev->atomic_mode) + ret = read_poll_timeout_atomic(tegra_i2c_mutex_trylock, locked, locked, + USEC_PER_MSEC, I2C_SW_MUTEX_TIMEOUT_US, + false, i2c_dev); + else + ret = read_poll_timeout(tegra_i2c_mutex_trylock, locked, locked, USEC_PER_MSEC, + I2C_SW_MUTEX_TIMEOUT_US, false, i2c_dev); + + if (ret) + dev_warn(i2c_dev->dev, "failed to acquire mutex\n"); + + return ret; +} + +static int tegra_i2c_mutex_unlock(struct tegra_i2c_dev *i2c_dev) +{ + unsigned int reg = i2c_dev->hw->regs->sw_mutex; + u32 val, id; + + if (!i2c_dev->hw->has_mutex) + return 0; + + val = readl(i2c_dev->base + reg); + + id = FIELD_GET(I2C_SW_MUTEX_GRANT, val); + if (id && id != I2C_SW_MUTEX_ID_CCPLEX) { + dev_warn(i2c_dev->dev, "unable to unlock mutex, mutex is owned by: %u\n", id); + return -EPERM; + } + + writel(0, i2c_dev->base + reg); + + return 0; } static void tegra_i2c_mask_irq(struct tegra_i2c_dev *i2c_dev, u32 mask) { u32 int_mask; - int_mask = i2c_readl(i2c_dev, I2C_INT_MASK) & ~mask; - i2c_writel(i2c_dev, int_mask, I2C_INT_MASK); + int_mask = i2c_readl(i2c_dev, i2c_dev->hw->regs->int_mask) & ~mask; + i2c_writel(i2c_dev, int_mask, i2c_dev->hw->regs->int_mask); } static void tegra_i2c_unmask_irq(struct tegra_i2c_dev *i2c_dev, u32 mask) { u32 int_mask; - int_mask = i2c_readl(i2c_dev, I2C_INT_MASK) | mask; - i2c_writel(i2c_dev, int_mask, I2C_INT_MASK); + int_mask = i2c_readl(i2c_dev, i2c_dev->hw->regs->int_mask) | mask; + i2c_writel(i2c_dev, int_mask, i2c_dev->hw->regs->int_mask); } static void tegra_i2c_dma_complete(void *args) @@ -449,6 +682,11 @@ static int tegra_i2c_init_dma(struct tegra_i2c_dev *i2c_dev) if (IS_VI(i2c_dev)) return 0; + if (!of_property_present(i2c_dev->dev->of_node, "dmas")) { + dev_dbg(i2c_dev->dev, "DMA not available, falling back to PIO\n"); + return 0; + } + if (i2c_dev->hw->has_apb_dma) { if (!IS_ENABLED(CONFIG_TEGRA20_APB_DMA)) { dev_dbg(i2c_dev->dev, "APB DMA support not enabled\n"); @@ -510,14 +748,14 @@ static void tegra_dvc_init(struct tegra_i2c_dev *i2c_dev) { u32 val; - val = dvc_readl(i2c_dev, DVC_CTRL_REG3); + val = i2c_readl(i2c_dev, i2c_dev->hw->regs->dvc_ctrl_reg3); val |= DVC_CTRL_REG3_SW_PROG; val |= DVC_CTRL_REG3_I2C_DONE_INTR_EN; - dvc_writel(i2c_dev, val, DVC_CTRL_REG3); + i2c_writel(i2c_dev, val, i2c_dev->hw->regs->dvc_ctrl_reg3); - val = dvc_readl(i2c_dev, DVC_CTRL_REG1); + val = i2c_readl(i2c_dev, i2c_dev->hw->regs->dvc_ctrl_reg1); val |= DVC_CTRL_REG1_INTR_EN; - dvc_writel(i2c_dev, val, DVC_CTRL_REG1); + i2c_writel(i2c_dev, val, i2c_dev->hw->regs->dvc_ctrl_reg1); } static void tegra_i2c_vi_init(struct tegra_i2c_dev *i2c_dev) @@ -526,34 +764,34 @@ static void tegra_i2c_vi_init(struct tegra_i2c_dev *i2c_dev) value = FIELD_PREP(I2C_INTERFACE_TIMING_THIGH, 2) | FIELD_PREP(I2C_INTERFACE_TIMING_TLOW, 4); - i2c_writel(i2c_dev, value, I2C_INTERFACE_TIMING_0); + i2c_writel(i2c_dev, value, i2c_dev->hw->regs->interface_timing_0); value = FIELD_PREP(I2C_INTERFACE_TIMING_TBUF, 4) | FIELD_PREP(I2C_INTERFACE_TIMING_TSU_STO, 7) | FIELD_PREP(I2C_INTERFACE_TIMING_THD_STA, 4) | FIELD_PREP(I2C_INTERFACE_TIMING_TSU_STA, 4); - i2c_writel(i2c_dev, value, I2C_INTERFACE_TIMING_1); + i2c_writel(i2c_dev, value, i2c_dev->hw->regs->interface_timing_1); value = FIELD_PREP(I2C_HS_INTERFACE_TIMING_THIGH, 3) | FIELD_PREP(I2C_HS_INTERFACE_TIMING_TLOW, 8); - i2c_writel(i2c_dev, value, I2C_HS_INTERFACE_TIMING_0); + i2c_writel(i2c_dev, value, i2c_dev->hw->regs->hs_interface_timing_0); value = FIELD_PREP(I2C_HS_INTERFACE_TIMING_TSU_STO, 11) | FIELD_PREP(I2C_HS_INTERFACE_TIMING_THD_STA, 11) | FIELD_PREP(I2C_HS_INTERFACE_TIMING_TSU_STA, 11); - i2c_writel(i2c_dev, value, I2C_HS_INTERFACE_TIMING_1); + i2c_writel(i2c_dev, value, i2c_dev->hw->regs->hs_interface_timing_1); value = FIELD_PREP(I2C_BC_SCLK_THRESHOLD, 9) | I2C_BC_STOP_COND; - i2c_writel(i2c_dev, value, I2C_BUS_CLEAR_CNFG); + i2c_writel(i2c_dev, value, i2c_dev->hw->regs->bus_clear_cnfg); - i2c_writel(i2c_dev, 0x0, I2C_TLOW_SEXT); + i2c_writel(i2c_dev, 0x0, i2c_dev->hw->regs->tlow_sext); } static int tegra_i2c_poll_register(struct tegra_i2c_dev *i2c_dev, u32 reg, u32 mask, u32 delay_us, u32 timeout_us) { - void __iomem *addr = i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, reg); + void __iomem *addr = i2c_dev->base + reg; u32 val; if (!i2c_dev->atomic_mode) @@ -572,11 +810,11 @@ static int tegra_i2c_flush_fifos(struct tegra_i2c_dev *i2c_dev) if (i2c_dev->hw->has_mst_fifo) { mask = I2C_MST_FIFO_CONTROL_TX_FLUSH | I2C_MST_FIFO_CONTROL_RX_FLUSH; - offset = I2C_MST_FIFO_CONTROL; + offset = i2c_dev->hw->regs->mst_fifo_control; } else { mask = I2C_FIFO_CONTROL_TX_FLUSH | I2C_FIFO_CONTROL_RX_FLUSH; - offset = I2C_FIFO_CONTROL; + offset = i2c_dev->hw->regs->fifo_control; } val = i2c_readl(i2c_dev, offset); @@ -599,9 +837,9 @@ static int tegra_i2c_wait_for_config_load(struct tegra_i2c_dev *i2c_dev) if (!i2c_dev->hw->has_config_load_reg) return 0; - i2c_writel(i2c_dev, I2C_MSTR_CONFIG_LOAD, I2C_CONFIG_LOAD); + i2c_writel(i2c_dev, I2C_MSTR_CONFIG_LOAD, i2c_dev->hw->regs->config_load); - err = tegra_i2c_poll_register(i2c_dev, I2C_CONFIG_LOAD, 0xffffffff, + err = tegra_i2c_poll_register(i2c_dev, i2c_dev->hw->regs->config_load, 0xffffffff, 1000, I2C_CONFIG_LOAD_TIMEOUT); if (err) { dev_err(i2c_dev->dev, "failed to load config\n"); @@ -622,10 +860,10 @@ static int tegra_i2c_master_reset(struct tegra_i2c_dev *i2c_dev) * SW needs to wait for 2us after assertion and de-assertion of this soft * reset. */ - i2c_writel(i2c_dev, 0x1, I2C_MASTER_RESET_CNTRL); + i2c_writel(i2c_dev, 0x1, i2c_dev->hw->regs->master_reset_cntrl); fsleep(2); - i2c_writel(i2c_dev, 0x0, I2C_MASTER_RESET_CNTRL); + i2c_writel(i2c_dev, 0x0, i2c_dev->hw->regs->master_reset_cntrl); fsleep(2); return 0; @@ -634,6 +872,7 @@ static int tegra_i2c_master_reset(struct tegra_i2c_dev *i2c_dev) static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev) { u32 val, clk_divisor, clk_multiplier, tsu_thd, tlow, thigh, non_hs_mode; + u32 max_bus_freq_hz; struct i2c_timings *t = &i2c_dev->timings; int err; @@ -666,43 +905,58 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev) if (i2c_dev->hw->has_multi_master_mode) val |= I2C_CNFG_MULTI_MASTER_MODE; - i2c_writel(i2c_dev, val, I2C_CNFG); - i2c_writel(i2c_dev, 0, I2C_INT_MASK); + i2c_writel(i2c_dev, val, i2c_dev->hw->regs->cnfg); + i2c_writel(i2c_dev, 0, i2c_dev->hw->regs->int_mask); if (IS_VI(i2c_dev)) tegra_i2c_vi_init(i2c_dev); - switch (t->bus_freq_hz) { - case I2C_MAX_STANDARD_MODE_FREQ + 1 ... I2C_MAX_FAST_MODE_PLUS_FREQ: - default: - tlow = i2c_dev->hw->tlow_fast_fastplus_mode; - thigh = i2c_dev->hw->thigh_fast_fastplus_mode; - tsu_thd = i2c_dev->hw->setup_hold_time_fast_fast_plus_mode; + if (i2c_dev->hw->enable_hs_mode_support) + max_bus_freq_hz = I2C_MAX_HIGH_SPEED_MODE_FREQ; + else + max_bus_freq_hz = I2C_MAX_FAST_MODE_PLUS_FREQ; - if (t->bus_freq_hz > I2C_MAX_FAST_MODE_FREQ) - non_hs_mode = i2c_dev->hw->clk_divisor_fast_plus_mode; - else - non_hs_mode = i2c_dev->hw->clk_divisor_fast_mode; - break; + if (WARN_ON(t->bus_freq_hz > max_bus_freq_hz)) + t->bus_freq_hz = max_bus_freq_hz; - case 0 ... I2C_MAX_STANDARD_MODE_FREQ: + if (t->bus_freq_hz <= I2C_MAX_STANDARD_MODE_FREQ) { tlow = i2c_dev->hw->tlow_std_mode; thigh = i2c_dev->hw->thigh_std_mode; tsu_thd = i2c_dev->hw->setup_hold_time_std_mode; non_hs_mode = i2c_dev->hw->clk_divisor_std_mode; - break; + } else if (t->bus_freq_hz <= I2C_MAX_FAST_MODE_FREQ) { + tlow = i2c_dev->hw->tlow_fast_mode; + thigh = i2c_dev->hw->thigh_fast_mode; + tsu_thd = i2c_dev->hw->setup_hold_time_fast_mode; + non_hs_mode = i2c_dev->hw->clk_divisor_fast_mode; + } else if (t->bus_freq_hz <= I2C_MAX_FAST_MODE_PLUS_FREQ) { + tlow = i2c_dev->hw->tlow_fastplus_mode; + thigh = i2c_dev->hw->thigh_fastplus_mode; + tsu_thd = i2c_dev->hw->setup_hold_time_fastplus_mode; + non_hs_mode = i2c_dev->hw->clk_divisor_fast_plus_mode; + } else { + /* + * When using HS mode, i.e. when the bus frequency is greater than fast plus mode, + * the non-hs timing registers will be used for sending the master code byte for + * transition to HS mode. Configure the non-hs timing registers for Fast Mode to + * send the master code byte at 400kHz. + */ + tlow = i2c_dev->hw->tlow_fast_mode; + thigh = i2c_dev->hw->thigh_fast_mode; + tsu_thd = i2c_dev->hw->setup_hold_time_fast_mode; + non_hs_mode = i2c_dev->hw->clk_divisor_fast_mode; } /* make sure clock divisor programmed correctly */ clk_divisor = FIELD_PREP(I2C_CLK_DIVISOR_HSMODE, i2c_dev->hw->clk_divisor_hs_mode) | FIELD_PREP(I2C_CLK_DIVISOR_STD_FAST_MODE, non_hs_mode); - i2c_writel(i2c_dev, clk_divisor, I2C_CLK_DIVISOR); + i2c_writel(i2c_dev, clk_divisor, i2c_dev->hw->regs->clk_divisor); if (i2c_dev->hw->has_interface_timing_reg) { val = FIELD_PREP(I2C_INTERFACE_TIMING_THIGH, thigh) | FIELD_PREP(I2C_INTERFACE_TIMING_TLOW, tlow); - i2c_writel(i2c_dev, val, I2C_INTERFACE_TIMING_0); + i2c_writel(i2c_dev, val, i2c_dev->hw->regs->interface_timing_0); } /* @@ -710,7 +964,19 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev) * Otherwise, preserve the chip default values. */ if (i2c_dev->hw->has_interface_timing_reg && tsu_thd) - i2c_writel(i2c_dev, tsu_thd, I2C_INTERFACE_TIMING_1); + i2c_writel(i2c_dev, tsu_thd, i2c_dev->hw->regs->interface_timing_1); + + /* Write HS mode registers. These will get used only for HS mode*/ + if (i2c_dev->hw->enable_hs_mode_support) { + tlow = i2c_dev->hw->tlow_hs_mode; + thigh = i2c_dev->hw->thigh_hs_mode; + tsu_thd = i2c_dev->hw->setup_hold_time_hs_mode; + + val = FIELD_PREP(I2C_HS_INTERFACE_TIMING_THIGH, thigh) | + FIELD_PREP(I2C_HS_INTERFACE_TIMING_TLOW, tlow); + i2c_writel(i2c_dev, val, i2c_dev->hw->regs->hs_interface_timing_0); + i2c_writel(i2c_dev, tsu_thd, i2c_dev->hw->regs->hs_interface_timing_1); + } clk_multiplier = (tlow + thigh + 2) * (non_hs_mode + 1); @@ -722,12 +988,12 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev) } if (!IS_DVC(i2c_dev) && !IS_VI(i2c_dev)) { - u32 sl_cfg = i2c_readl(i2c_dev, I2C_SL_CNFG); + u32 sl_cfg = i2c_readl(i2c_dev, i2c_dev->hw->regs->sl_cnfg); sl_cfg |= I2C_SL_CNFG_NACK | I2C_SL_CNFG_NEWSL; - i2c_writel(i2c_dev, sl_cfg, I2C_SL_CNFG); - i2c_writel(i2c_dev, 0xfc, I2C_SL_ADDR1); - i2c_writel(i2c_dev, 0x00, I2C_SL_ADDR2); + i2c_writel(i2c_dev, sl_cfg, i2c_dev->hw->regs->sl_cnfg); + i2c_writel(i2c_dev, 0xfc, i2c_dev->hw->regs->sl_addr1); + i2c_writel(i2c_dev, 0x00, i2c_dev->hw->regs->sl_addr2); } err = tegra_i2c_flush_fifos(i2c_dev); @@ -735,7 +1001,7 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev) return err; if (i2c_dev->multimaster_mode && i2c_dev->hw->has_slcg_override_reg) - i2c_writel(i2c_dev, I2C_MST_CORE_CLKEN_OVR, I2C_CLKEN_OVERRIDE); + i2c_writel(i2c_dev, I2C_MST_CORE_CLKEN_OVR, i2c_dev->hw->regs->clken_override); err = tegra_i2c_wait_for_config_load(i2c_dev); if (err) @@ -756,9 +1022,9 @@ static int tegra_i2c_disable_packet_mode(struct tegra_i2c_dev *i2c_dev) */ udelay(DIV_ROUND_UP(2 * 1000000, i2c_dev->timings.bus_freq_hz)); - cnfg = i2c_readl(i2c_dev, I2C_CNFG); + cnfg = i2c_readl(i2c_dev, i2c_dev->hw->regs->cnfg); if (cnfg & I2C_CNFG_PACKET_MODE_EN) - i2c_writel(i2c_dev, cnfg & ~I2C_CNFG_PACKET_MODE_EN, I2C_CNFG); + i2c_writel(i2c_dev, cnfg & ~I2C_CNFG_PACKET_MODE_EN, i2c_dev->hw->regs->cnfg); return tegra_i2c_wait_for_config_load(i2c_dev); } @@ -778,10 +1044,10 @@ static int tegra_i2c_empty_rx_fifo(struct tegra_i2c_dev *i2c_dev) return -EINVAL; if (i2c_dev->hw->has_mst_fifo) { - val = i2c_readl(i2c_dev, I2C_MST_FIFO_STATUS); + val = i2c_readl(i2c_dev, i2c_dev->hw->regs->mst_fifo_status); rx_fifo_avail = FIELD_GET(I2C_MST_FIFO_STATUS_RX, val); } else { - val = i2c_readl(i2c_dev, I2C_FIFO_STATUS); + val = i2c_readl(i2c_dev, i2c_dev->hw->regs->fifo_status); rx_fifo_avail = FIELD_GET(I2C_FIFO_STATUS_RX, val); } @@ -790,7 +1056,7 @@ static int tegra_i2c_empty_rx_fifo(struct tegra_i2c_dev *i2c_dev) if (words_to_transfer > rx_fifo_avail) words_to_transfer = rx_fifo_avail; - i2c_readsl(i2c_dev, buf, I2C_RX_FIFO, words_to_transfer); + i2c_readsl(i2c_dev, buf, i2c_dev->hw->regs->rx_fifo, words_to_transfer); buf += words_to_transfer * BYTES_PER_FIFO_WORD; buf_remaining -= words_to_transfer * BYTES_PER_FIFO_WORD; @@ -806,7 +1072,7 @@ static int tegra_i2c_empty_rx_fifo(struct tegra_i2c_dev *i2c_dev) * when (words_to_transfer was > rx_fifo_avail) earlier * in this function. */ - val = i2c_readl(i2c_dev, I2C_RX_FIFO); + val = i2c_readl(i2c_dev, i2c_dev->hw->regs->rx_fifo); val = cpu_to_le32(val); memcpy(buf, &val, buf_remaining); buf_remaining = 0; @@ -831,10 +1097,10 @@ static int tegra_i2c_fill_tx_fifo(struct tegra_i2c_dev *i2c_dev) u32 val; if (i2c_dev->hw->has_mst_fifo) { - val = i2c_readl(i2c_dev, I2C_MST_FIFO_STATUS); + val = i2c_readl(i2c_dev, i2c_dev->hw->regs->mst_fifo_status); tx_fifo_avail = FIELD_GET(I2C_MST_FIFO_STATUS_TX, val); } else { - val = i2c_readl(i2c_dev, I2C_FIFO_STATUS); + val = i2c_readl(i2c_dev, i2c_dev->hw->regs->fifo_status); tx_fifo_avail = FIELD_GET(I2C_FIFO_STATUS_TX, val); } @@ -865,9 +1131,9 @@ static int tegra_i2c_fill_tx_fifo(struct tegra_i2c_dev *i2c_dev) i2c_dev->msg_buf = buf + words_to_transfer * BYTES_PER_FIFO_WORD; if (IS_VI(i2c_dev)) - i2c_writesl_vi(i2c_dev, buf, I2C_TX_FIFO, words_to_transfer); + i2c_writesl_vi(i2c_dev, buf, i2c_dev->hw->regs->tx_fifo, words_to_transfer); else - i2c_writesl(i2c_dev, buf, I2C_TX_FIFO, words_to_transfer); + i2c_writesl(i2c_dev, buf, i2c_dev->hw->regs->tx_fifo, words_to_transfer); buf += words_to_transfer * BYTES_PER_FIFO_WORD; } @@ -889,7 +1155,7 @@ static int tegra_i2c_fill_tx_fifo(struct tegra_i2c_dev *i2c_dev) i2c_dev->msg_buf_remaining = 0; i2c_dev->msg_buf = NULL; - i2c_writel(i2c_dev, val, I2C_TX_FIFO); + i2c_writel(i2c_dev, val, i2c_dev->hw->regs->tx_fifo); } return 0; @@ -901,13 +1167,13 @@ static irqreturn_t tegra_i2c_isr(int irq, void *dev_id) struct tegra_i2c_dev *i2c_dev = dev_id; u32 status; - status = i2c_readl(i2c_dev, I2C_INT_STATUS); + status = i2c_readl(i2c_dev, i2c_dev->hw->regs->int_status); if (status == 0) { dev_warn(i2c_dev->dev, "IRQ status 0 %08x %08x %08x\n", - i2c_readl(i2c_dev, I2C_PACKET_TRANSFER_STATUS), - i2c_readl(i2c_dev, I2C_STATUS), - i2c_readl(i2c_dev, I2C_CNFG)); + i2c_readl(i2c_dev, i2c_dev->hw->regs->packet_transfer_status), + i2c_readl(i2c_dev, i2c_dev->hw->regs->status), + i2c_readl(i2c_dev, i2c_dev->hw->regs->cnfg)); i2c_dev->msg_err |= I2C_ERR_UNKNOWN_INTERRUPT; goto err; } @@ -950,9 +1216,9 @@ static irqreturn_t tegra_i2c_isr(int irq, void *dev_id) } } - i2c_writel(i2c_dev, status, I2C_INT_STATUS); + i2c_writel(i2c_dev, status, i2c_dev->hw->regs->int_status); if (IS_DVC(i2c_dev)) - dvc_writel(i2c_dev, DVC_STATUS_I2C_DONE_INTR, DVC_STATUS); + i2c_writel(i2c_dev, DVC_STATUS_I2C_DONE_INTR, i2c_dev->hw->regs->dvc_status); /* * During message read XFER_COMPLETE interrupt is triggered prior to @@ -988,10 +1254,10 @@ static irqreturn_t tegra_i2c_isr(int irq, void *dev_id) if (i2c_dev->hw->supports_bus_clear) tegra_i2c_mask_irq(i2c_dev, I2C_INT_BUS_CLR_DONE); - i2c_writel(i2c_dev, status, I2C_INT_STATUS); + i2c_writel(i2c_dev, status, i2c_dev->hw->regs->int_status); if (IS_DVC(i2c_dev)) - dvc_writel(i2c_dev, DVC_STATUS_I2C_DONE_INTR, DVC_STATUS); + i2c_writel(i2c_dev, DVC_STATUS_I2C_DONE_INTR, i2c_dev->hw->regs->dvc_status); if (i2c_dev->dma_mode) { dmaengine_terminate_async(i2c_dev->dma_chan); @@ -1011,9 +1277,9 @@ static void tegra_i2c_config_fifo_trig(struct tegra_i2c_dev *i2c_dev, int err; if (i2c_dev->hw->has_mst_fifo) - reg = I2C_MST_FIFO_CONTROL; + reg = i2c_dev->hw->regs->mst_fifo_control; else - reg = I2C_FIFO_CONTROL; + reg = i2c_dev->hw->regs->fifo_control; if (i2c_dev->dma_mode) { if (len & 0xF) @@ -1024,7 +1290,7 @@ static void tegra_i2c_config_fifo_trig(struct tegra_i2c_dev *i2c_dev, dma_burst = 8; if (i2c_dev->msg_read) { - reg_offset = tegra_i2c_reg_addr(i2c_dev, I2C_RX_FIFO); + reg_offset = i2c_dev->hw->regs->rx_fifo; slv_config.src_addr = i2c_dev->base_phys + reg_offset; slv_config.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; @@ -1035,7 +1301,7 @@ static void tegra_i2c_config_fifo_trig(struct tegra_i2c_dev *i2c_dev, else val = I2C_FIFO_CONTROL_RX_TRIG(dma_burst); } else { - reg_offset = tegra_i2c_reg_addr(i2c_dev, I2C_TX_FIFO); + reg_offset = i2c_dev->hw->regs->tx_fifo; slv_config.dst_addr = i2c_dev->base_phys + reg_offset; slv_config.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; @@ -1078,7 +1344,7 @@ static unsigned long tegra_i2c_poll_completion(struct tegra_i2c_dev *i2c_dev, ktime_t ktimeout = ktime_add_ms(ktime, timeout_ms); do { - u32 status = i2c_readl(i2c_dev, I2C_INT_STATUS); + u32 status = i2c_readl(i2c_dev, i2c_dev->hw->regs->int_status); if (status) tegra_i2c_isr(i2c_dev->irq, i2c_dev); @@ -1137,14 +1403,14 @@ static int tegra_i2c_issue_bus_clear(struct i2c_adapter *adap) val = FIELD_PREP(I2C_BC_SCLK_THRESHOLD, 9) | I2C_BC_STOP_COND | I2C_BC_TERMINATE; - i2c_writel(i2c_dev, val, I2C_BUS_CLEAR_CNFG); + i2c_writel(i2c_dev, val, i2c_dev->hw->regs->bus_clear_cnfg); err = tegra_i2c_wait_for_config_load(i2c_dev); if (err) return err; val |= I2C_BC_ENABLE; - i2c_writel(i2c_dev, val, I2C_BUS_CLEAR_CNFG); + i2c_writel(i2c_dev, val, i2c_dev->hw->regs->bus_clear_cnfg); tegra_i2c_unmask_irq(i2c_dev, I2C_INT_BUS_CLR_DONE); time_left = tegra_i2c_wait_completion(i2c_dev, &i2c_dev->msg_complete, 50); @@ -1155,7 +1421,7 @@ static int tegra_i2c_issue_bus_clear(struct i2c_adapter *adap) return -ETIMEDOUT; } - val = i2c_readl(i2c_dev, I2C_BUS_CLEAR_STATUS); + val = i2c_readl(i2c_dev, i2c_dev->hw->regs->bus_clear_status); if (!(val & I2C_BC_STATUS)) { dev_err(i2c_dev->dev, "un-recovered arbitration lost\n"); return -EIO; @@ -1180,14 +1446,14 @@ static void tegra_i2c_push_packet_header(struct tegra_i2c_dev *i2c_dev, if (i2c_dev->dma_mode && !i2c_dev->msg_read) *dma_buf++ = packet_header; else - i2c_writel(i2c_dev, packet_header, I2C_TX_FIFO); + i2c_writel(i2c_dev, packet_header, i2c_dev->hw->regs->tx_fifo); packet_header = i2c_dev->msg_len - 1; if (i2c_dev->dma_mode && !i2c_dev->msg_read) *dma_buf++ = packet_header; else - i2c_writel(i2c_dev, packet_header, I2C_TX_FIFO); + i2c_writel(i2c_dev, packet_header, i2c_dev->hw->regs->tx_fifo); packet_header = I2C_HEADER_IE_ENABLE; @@ -1209,10 +1475,13 @@ static void tegra_i2c_push_packet_header(struct tegra_i2c_dev *i2c_dev, if (msg->flags & I2C_M_RD) packet_header |= I2C_HEADER_READ; + if (i2c_dev->timings.bus_freq_hz > I2C_MAX_FAST_MODE_PLUS_FREQ) + packet_header |= I2C_HEADER_HS_MODE; + if (i2c_dev->dma_mode && !i2c_dev->msg_read) *dma_buf++ = packet_header; else - i2c_writel(i2c_dev, packet_header, I2C_TX_FIFO); + i2c_writel(i2c_dev, packet_header, i2c_dev->hw->regs->tx_fifo); } static int tegra_i2c_error_recover(struct tegra_i2c_dev *i2c_dev, @@ -1333,7 +1602,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, tegra_i2c_unmask_irq(i2c_dev, int_mask); dev_dbg(i2c_dev->dev, "unmasked IRQ: %02x\n", - i2c_readl(i2c_dev, I2C_INT_MASK)); + i2c_readl(i2c_dev, i2c_dev->hw->regs->int_mask)); if (i2c_dev->dma_mode) { time_left = tegra_i2c_wait_completion(i2c_dev, @@ -1393,6 +1662,10 @@ static int tegra_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], return ret; } + ret = tegra_i2c_mutex_lock(i2c_dev); + if (ret) + return ret; + for (i = 0; i < num; i++) { enum msg_end_type end_type = MSG_END_STOP; @@ -1422,6 +1695,7 @@ static int tegra_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], break; } + ret = tegra_i2c_mutex_unlock(i2c_dev); pm_runtime_put(i2c_dev->dev); return ret ?: i; @@ -1491,14 +1765,55 @@ static const struct tegra_i2c_hw_feature tegra20_i2c_hw = { .has_apb_dma = true, .tlow_std_mode = 0x4, .thigh_std_mode = 0x2, - .tlow_fast_fastplus_mode = 0x4, - .thigh_fast_fastplus_mode = 0x2, + .tlow_fast_mode = 0x4, + .thigh_fast_mode = 0x2, + .tlow_fastplus_mode = 0x4, + .thigh_fastplus_mode = 0x2, .setup_hold_time_std_mode = 0x0, - .setup_hold_time_fast_fast_plus_mode = 0x0, + .setup_hold_time_fast_mode = 0x0, + .setup_hold_time_fastplus_mode = 0x0, .setup_hold_time_hs_mode = 0x0, .has_interface_timing_reg = false, + .enable_hs_mode_support = false, + .has_mutex = false, + .variant = TEGRA_I2C_VARIANT_DEFAULT, + .regs = &tegra20_i2c_regs, }; +#if IS_ENABLED(CONFIG_ARCH_TEGRA_2x_SOC) +static const struct tegra_i2c_hw_feature tegra20_dvc_i2c_hw = { + .has_continue_xfer_support = false, + .has_per_pkt_xfer_complete_irq = false, + .clk_divisor_hs_mode = 3, + .clk_divisor_std_mode = 0, + .clk_divisor_fast_mode = 0, + .clk_divisor_fast_plus_mode = 0, + .has_config_load_reg = false, + .has_multi_master_mode = false, + .has_slcg_override_reg = false, + .has_mst_fifo = false, + .has_mst_reset = false, + .quirks = &tegra_i2c_quirks, + .supports_bus_clear = false, + .has_apb_dma = true, + .tlow_std_mode = 0x4, + .thigh_std_mode = 0x2, + .tlow_fast_mode = 0x4, + .thigh_fast_mode = 0x2, + .tlow_fastplus_mode = 0x4, + .thigh_fastplus_mode = 0x2, + .setup_hold_time_std_mode = 0x0, + .setup_hold_time_fast_mode = 0x0, + .setup_hold_time_fastplus_mode = 0x0, + .setup_hold_time_hs_mode = 0x0, + .has_interface_timing_reg = false, + .enable_hs_mode_support = false, + .has_mutex = false, + .variant = TEGRA_I2C_VARIANT_DVC, + .regs = &tegra20_dvc_i2c_regs, +}; +#endif + static const struct tegra_i2c_hw_feature tegra30_i2c_hw = { .has_continue_xfer_support = true, .has_per_pkt_xfer_complete_irq = false, @@ -1516,12 +1831,19 @@ static const struct tegra_i2c_hw_feature tegra30_i2c_hw = { .has_apb_dma = true, .tlow_std_mode = 0x4, .thigh_std_mode = 0x2, - .tlow_fast_fastplus_mode = 0x4, - .thigh_fast_fastplus_mode = 0x2, + .tlow_fast_mode = 0x4, + .thigh_fast_mode = 0x2, + .tlow_fastplus_mode = 0x4, + .thigh_fastplus_mode = 0x2, .setup_hold_time_std_mode = 0x0, - .setup_hold_time_fast_fast_plus_mode = 0x0, + .setup_hold_time_fast_mode = 0x0, + .setup_hold_time_fastplus_mode = 0x0, .setup_hold_time_hs_mode = 0x0, .has_interface_timing_reg = false, + .enable_hs_mode_support = false, + .has_mutex = false, + .variant = TEGRA_I2C_VARIANT_DEFAULT, + .regs = &tegra20_i2c_regs, }; static const struct tegra_i2c_hw_feature tegra114_i2c_hw = { @@ -1541,12 +1863,19 @@ static const struct tegra_i2c_hw_feature tegra114_i2c_hw = { .has_apb_dma = true, .tlow_std_mode = 0x4, .thigh_std_mode = 0x2, - .tlow_fast_fastplus_mode = 0x4, - .thigh_fast_fastplus_mode = 0x2, + .tlow_fast_mode = 0x4, + .thigh_fast_mode = 0x2, + .tlow_fastplus_mode = 0x4, + .thigh_fastplus_mode = 0x2, .setup_hold_time_std_mode = 0x0, - .setup_hold_time_fast_fast_plus_mode = 0x0, + .setup_hold_time_fast_mode = 0x0, + .setup_hold_time_fastplus_mode = 0x0, .setup_hold_time_hs_mode = 0x0, .has_interface_timing_reg = false, + .enable_hs_mode_support = false, + .has_mutex = false, + .variant = TEGRA_I2C_VARIANT_DEFAULT, + .regs = &tegra20_i2c_regs, }; static const struct tegra_i2c_hw_feature tegra124_i2c_hw = { @@ -1566,12 +1895,19 @@ static const struct tegra_i2c_hw_feature tegra124_i2c_hw = { .has_apb_dma = true, .tlow_std_mode = 0x4, .thigh_std_mode = 0x2, - .tlow_fast_fastplus_mode = 0x4, - .thigh_fast_fastplus_mode = 0x2, + .tlow_fast_mode = 0x4, + .thigh_fast_mode = 0x2, + .tlow_fastplus_mode = 0x4, + .thigh_fastplus_mode = 0x2, .setup_hold_time_std_mode = 0x0, - .setup_hold_time_fast_fast_plus_mode = 0x0, + .setup_hold_time_fast_mode = 0x0, + .setup_hold_time_fastplus_mode = 0x0, .setup_hold_time_hs_mode = 0x0, .has_interface_timing_reg = true, + .enable_hs_mode_support = false, + .has_mutex = false, + .variant = TEGRA_I2C_VARIANT_DEFAULT, + .regs = &tegra20_i2c_regs, }; static const struct tegra_i2c_hw_feature tegra210_i2c_hw = { @@ -1591,13 +1927,54 @@ static const struct tegra_i2c_hw_feature tegra210_i2c_hw = { .has_apb_dma = true, .tlow_std_mode = 0x4, .thigh_std_mode = 0x2, - .tlow_fast_fastplus_mode = 0x4, - .thigh_fast_fastplus_mode = 0x2, + .tlow_fast_mode = 0x4, + .thigh_fast_mode = 0x2, + .tlow_fastplus_mode = 0x4, + .thigh_fastplus_mode = 0x2, + .setup_hold_time_std_mode = 0, + .setup_hold_time_fast_mode = 0, + .setup_hold_time_fastplus_mode = 0, + .setup_hold_time_hs_mode = 0, + .has_interface_timing_reg = true, + .enable_hs_mode_support = false, + .has_mutex = false, + .variant = TEGRA_I2C_VARIANT_DEFAULT, + .regs = &tegra20_i2c_regs, +}; + +#if IS_ENABLED(CONFIG_ARCH_TEGRA_210_SOC) +static const struct tegra_i2c_hw_feature tegra210_vi_i2c_hw = { + .has_continue_xfer_support = true, + .has_per_pkt_xfer_complete_irq = true, + .clk_divisor_hs_mode = 1, + .clk_divisor_std_mode = 0x19, + .clk_divisor_fast_mode = 0x19, + .clk_divisor_fast_plus_mode = 0x10, + .has_config_load_reg = true, + .has_multi_master_mode = false, + .has_slcg_override_reg = true, + .has_mst_fifo = false, + .has_mst_reset = false, + .quirks = &tegra_i2c_quirks, + .supports_bus_clear = true, + .has_apb_dma = true, + .tlow_std_mode = 0x4, + .thigh_std_mode = 0x2, + .tlow_fast_mode = 0x4, + .thigh_fast_mode = 0x2, + .tlow_fastplus_mode = 0x4, + .thigh_fastplus_mode = 0x2, .setup_hold_time_std_mode = 0, - .setup_hold_time_fast_fast_plus_mode = 0, + .setup_hold_time_fast_mode = 0, + .setup_hold_time_fastplus_mode = 0, .setup_hold_time_hs_mode = 0, .has_interface_timing_reg = true, + .enable_hs_mode_support = false, + .has_mutex = false, + .variant = TEGRA_I2C_VARIANT_VI, + .regs = &tegra210_vi_i2c_regs, }; +#endif static const struct tegra_i2c_hw_feature tegra186_i2c_hw = { .has_continue_xfer_support = true, @@ -1616,12 +1993,19 @@ static const struct tegra_i2c_hw_feature tegra186_i2c_hw = { .has_apb_dma = false, .tlow_std_mode = 0x4, .thigh_std_mode = 0x3, - .tlow_fast_fastplus_mode = 0x4, - .thigh_fast_fastplus_mode = 0x2, + .tlow_fast_mode = 0x4, + .thigh_fast_mode = 0x2, + .tlow_fastplus_mode = 0x4, + .thigh_fastplus_mode = 0x2, .setup_hold_time_std_mode = 0, - .setup_hold_time_fast_fast_plus_mode = 0, + .setup_hold_time_fast_mode = 0, + .setup_hold_time_fastplus_mode = 0, .setup_hold_time_hs_mode = 0, .has_interface_timing_reg = true, + .enable_hs_mode_support = false, + .has_mutex = false, + .variant = TEGRA_I2C_VARIANT_DEFAULT, + .regs = &tegra20_i2c_regs, }; static const struct tegra_i2c_hw_feature tegra194_i2c_hw = { @@ -1641,19 +2025,132 @@ static const struct tegra_i2c_hw_feature tegra194_i2c_hw = { .has_apb_dma = false, .tlow_std_mode = 0x8, .thigh_std_mode = 0x7, - .tlow_fast_fastplus_mode = 0x2, - .thigh_fast_fastplus_mode = 0x2, + .tlow_fast_mode = 0x2, + .thigh_fast_mode = 0x2, + .tlow_fastplus_mode = 0x2, + .thigh_fastplus_mode = 0x2, + .tlow_hs_mode = 0x8, + .thigh_hs_mode = 0x3, + .setup_hold_time_std_mode = 0x08080808, + .setup_hold_time_fast_mode = 0x02020202, + .setup_hold_time_fastplus_mode = 0x02020202, + .setup_hold_time_hs_mode = 0x090909, + .has_interface_timing_reg = true, + .enable_hs_mode_support = true, + .has_mutex = false, + .variant = TEGRA_I2C_VARIANT_DEFAULT, + .regs = &tegra20_i2c_regs, +}; + +static const struct tegra_i2c_hw_feature tegra256_i2c_hw = { + .has_continue_xfer_support = true, + .has_per_pkt_xfer_complete_irq = true, + .clk_divisor_hs_mode = 9, + .clk_divisor_std_mode = 0x7a, + .clk_divisor_fast_mode = 0x40, + .clk_divisor_fast_plus_mode = 0x14, + .has_config_load_reg = true, + .has_multi_master_mode = true, + .has_slcg_override_reg = true, + .has_mst_fifo = true, + .has_mst_reset = true, + .quirks = &tegra194_i2c_quirks, + .supports_bus_clear = true, + .has_apb_dma = false, + .tlow_std_mode = 0x8, + .thigh_std_mode = 0x7, + .tlow_fast_mode = 0x4, + .thigh_fast_mode = 0x2, + .tlow_fastplus_mode = 0x4, + .thigh_fastplus_mode = 0x4, + .tlow_hs_mode = 0x3, + .thigh_hs_mode = 0x2, + .setup_hold_time_std_mode = 0x08080808, + .setup_hold_time_fast_mode = 0x04010101, + .setup_hold_time_fastplus_mode = 0x04020202, + .setup_hold_time_hs_mode = 0x030303, + .has_interface_timing_reg = true, + .enable_hs_mode_support = true, + .has_mutex = true, + .variant = TEGRA_I2C_VARIANT_DEFAULT, + .regs = &tegra20_i2c_regs, +}; + +static const struct tegra_i2c_hw_feature tegra264_i2c_hw = { + .has_continue_xfer_support = true, + .has_per_pkt_xfer_complete_irq = true, + .clk_divisor_hs_mode = 1, + .clk_divisor_std_mode = 0x1d, + .clk_divisor_fast_mode = 0x15, + .clk_divisor_fast_plus_mode = 0x8, + .has_config_load_reg = true, + .has_multi_master_mode = true, + .has_slcg_override_reg = true, + .has_mst_fifo = true, + .has_mst_reset = true, + .quirks = &tegra194_i2c_quirks, + .supports_bus_clear = true, + .has_apb_dma = false, + .tlow_std_mode = 0x8, + .thigh_std_mode = 0x7, + .tlow_fast_mode = 0x2, + .thigh_fast_mode = 0x2, + .tlow_fastplus_mode = 0x2, + .thigh_fastplus_mode = 0x2, + .tlow_hs_mode = 0x4, + .thigh_hs_mode = 0x2, .setup_hold_time_std_mode = 0x08080808, - .setup_hold_time_fast_fast_plus_mode = 0x02020202, + .setup_hold_time_fast_mode = 0x02020202, + .setup_hold_time_fastplus_mode = 0x02020202, .setup_hold_time_hs_mode = 0x090909, .has_interface_timing_reg = true, + .enable_hs_mode_support = true, + .has_mutex = true, + .variant = TEGRA_I2C_VARIANT_DEFAULT, + .regs = &tegra264_i2c_regs, +}; + +static const struct tegra_i2c_hw_feature tegra410_i2c_hw = { + .has_continue_xfer_support = true, + .has_per_pkt_xfer_complete_irq = true, + .clk_divisor_hs_mode = 1, + .clk_divisor_std_mode = 0x3f, + .clk_divisor_fast_mode = 0x2c, + .clk_divisor_fast_plus_mode = 0x11, + .has_config_load_reg = true, + .has_multi_master_mode = true, + .has_slcg_override_reg = true, + .has_mst_fifo = true, + .has_mst_reset = true, + .quirks = &tegra194_i2c_quirks, + .supports_bus_clear = true, + .has_apb_dma = false, + .tlow_std_mode = 0x8, + .thigh_std_mode = 0x7, + .tlow_fast_mode = 0x2, + .thigh_fast_mode = 0x2, + .tlow_fastplus_mode = 0x2, + .thigh_fastplus_mode = 0x2, + .tlow_hs_mode = 0x8, + .thigh_hs_mode = 0x6, + .setup_hold_time_std_mode = 0x08080808, + .setup_hold_time_fast_mode = 0x02020202, + .setup_hold_time_fastplus_mode = 0x02020202, + .setup_hold_time_hs_mode = 0x0b0b0b, + .has_interface_timing_reg = true, + .enable_hs_mode_support = true, + .has_mutex = true, + .variant = TEGRA_I2C_VARIANT_DEFAULT, + .regs = &tegra410_i2c_regs, }; static const struct of_device_id tegra_i2c_of_match[] = { + { .compatible = "nvidia,tegra264-i2c", .data = &tegra264_i2c_hw, }, + { .compatible = "nvidia,tegra256-i2c", .data = &tegra256_i2c_hw, }, { .compatible = "nvidia,tegra194-i2c", .data = &tegra194_i2c_hw, }, { .compatible = "nvidia,tegra186-i2c", .data = &tegra186_i2c_hw, }, #if IS_ENABLED(CONFIG_ARCH_TEGRA_210_SOC) - { .compatible = "nvidia,tegra210-i2c-vi", .data = &tegra210_i2c_hw, }, + { .compatible = "nvidia,tegra210-i2c-vi", .data = &tegra210_vi_i2c_hw, }, #endif { .compatible = "nvidia,tegra210-i2c", .data = &tegra210_i2c_hw, }, { .compatible = "nvidia,tegra124-i2c", .data = &tegra124_i2c_hw, }, @@ -1661,7 +2158,7 @@ static const struct of_device_id tegra_i2c_of_match[] = { { .compatible = "nvidia,tegra30-i2c", .data = &tegra30_i2c_hw, }, { .compatible = "nvidia,tegra20-i2c", .data = &tegra20_i2c_hw, }, #if IS_ENABLED(CONFIG_ARCH_TEGRA_2x_SOC) - { .compatible = "nvidia,tegra20-i2c-dvc", .data = &tegra20_i2c_hw, }, + { .compatible = "nvidia,tegra20-i2c-dvc", .data = &tegra20_dvc_i2c_hw, }, #endif {}, }; @@ -1669,21 +2166,12 @@ MODULE_DEVICE_TABLE(of, tegra_i2c_of_match); static void tegra_i2c_parse_dt(struct tegra_i2c_dev *i2c_dev) { - struct device_node *np = i2c_dev->dev->of_node; bool multi_mode; i2c_parse_fw_timings(i2c_dev->dev, &i2c_dev->timings, true); multi_mode = device_property_read_bool(i2c_dev->dev, "multi-master"); i2c_dev->multimaster_mode = multi_mode; - - if (IS_ENABLED(CONFIG_ARCH_TEGRA_2x_SOC) && - of_device_is_compatible(np, "nvidia,tegra20-i2c-dvc")) - i2c_dev->is_dvc = true; - - if (IS_ENABLED(CONFIG_ARCH_TEGRA_210_SOC) && - of_device_is_compatible(np, "nvidia,tegra210-i2c-vi")) - i2c_dev->is_vi = true; } static int tegra_i2c_init_clocks(struct tegra_i2c_dev *i2c_dev) @@ -1966,6 +2454,7 @@ static const struct acpi_device_id tegra_i2c_acpi_match[] = { {.id = "NVDA0101", .driver_data = (kernel_ulong_t)&tegra210_i2c_hw}, {.id = "NVDA0201", .driver_data = (kernel_ulong_t)&tegra186_i2c_hw}, {.id = "NVDA0301", .driver_data = (kernel_ulong_t)&tegra194_i2c_hw}, + {.id = "NVDA2017", .driver_data = (kernel_ulong_t)&tegra410_i2c_hw}, { } }; MODULE_DEVICE_TABLE(acpi, tegra_i2c_acpi_match); diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c index 67a18e437f831..4482057d6741f 100644 --- a/drivers/i3c/master.c +++ b/drivers/i3c/master.c @@ -14,9 +14,12 @@ #include #include #include +#include #include #include #include +#include +#include #include "internals.h" @@ -466,7 +469,7 @@ static void i3c_bus_cleanup(struct i3c_bus *i3cbus) mutex_unlock(&i3c_core_lock); } -static int i3c_bus_init(struct i3c_bus *i3cbus, struct device_node *np) +static int i3c_bus_init(struct i3c_bus *i3cbus, struct fwnode_handle *fwnode) { int ret, start, end, id = -1; @@ -476,8 +479,8 @@ static int i3c_bus_init(struct i3c_bus *i3cbus, struct device_node *np) i3c_bus_init_addrslots(i3cbus); i3cbus->mode = I3C_BUS_MODE_PURE; - if (np) - id = of_alias_get_id(np, "i3c"); + if (fwnode && is_of_node(fwnode)) + id = of_alias_get_id(to_of_node(fwnode), "i3c"); mutex_lock(&i3c_core_lock); if (id >= 0) { @@ -710,7 +713,7 @@ static void i3c_masterdev_release(struct device *dev) WARN_ON(!list_empty(&bus->devs.i2c) || !list_empty(&bus->devs.i3c)); i3c_bus_cleanup(bus); - of_node_put(dev->of_node); + fwnode_handle_put(dev->fwnode); } static const struct device_type i3c_masterdev_type = { @@ -894,7 +897,7 @@ static void i3c_device_release(struct device *dev) WARN_ON(i3cdev->desc); - of_node_put(i3cdev->dev.of_node); + fwnode_handle_put(dev->fwnode); kfree(i3cdev); } @@ -945,6 +948,43 @@ static int i3c_master_rstdaa_locked(struct i3c_master_controller *master, return ret; } +/** + * i3c_master_setaasa_locked() - start a SETAASA procedure (Set All Addresses to Static Address) + * @master: I3C master object + * + * Send a SETAASA CCC command to set all attached I3C devices' dynamic addresses to + * their static address. + * + * This function must be called with the bus lock held in write mode. + * + * First, the SETHID CCC command is sent, followed by the SETAASA CCC. + * + * Return: 0 in case of success, a positive I3C error code if the error is + * one of the official Mx error codes, and a negative error code otherwise. + */ +static int i3c_master_setaasa_locked(struct i3c_master_controller *master) +{ + struct i3c_ccc_cmd_dest dest; + struct i3c_ccc_cmd cmd; + int ret; + + /* Send SETHID CCC command */ + i3c_ccc_cmd_dest_init(&dest, I3C_BROADCAST_ADDR, 0); + i3c_ccc_cmd_init(&cmd, false, I3C_CCC_VENDOR(0, true), &dest, 1); + ret = i3c_master_send_ccc_cmd_locked(master, &cmd); + i3c_ccc_cmd_dest_cleanup(&dest); + if (ret) + return ret; + + /* Send SETAASA CCC command */ + i3c_ccc_cmd_dest_init(&dest, I3C_BROADCAST_ADDR, 0); + i3c_ccc_cmd_init(&cmd, false, I3C_CCC_SETAASA, &dest, 1); + ret = i3c_master_send_ccc_cmd_locked(master, &cmd); + i3c_ccc_cmd_dest_cleanup(&dest); + + return ret; +} + /** * i3c_master_entdaa_locked() - start a DAA (Dynamic Address Assignment) * procedure @@ -1629,6 +1669,18 @@ static int i3c_master_early_i3c_dev_add(struct i3c_master_controller *master, if (ret) goto err_free_dev; + /* + * For devices using SETAASA instead of ENTDAA, the address is statically + * assigned. Update the dynamic address to the provided static address. + * Reattaching the I3C device is not useful. It is also not mandatory + * for such devices to implement CCC commands like GETPID, GETDCR etc. + * Hence, we can return here. + */ + if (i3cdev->boardinfo->static_addr_method & BIT(1)) { + i3cdev->info.dyn_addr = i3cdev->boardinfo->static_addr; + return 0; + } + ret = i3c_master_setdasa_locked(master, i3cdev->info.static_addr, i3cdev->boardinfo->init_dyn_addr); if (ret) @@ -1678,11 +1730,20 @@ i3c_master_register_new_i3c_devs(struct i3c_master_controller *master) desc->dev->dev.type = &i3c_device_type; desc->dev->dev.bus = &i3c_bus_type; desc->dev->dev.release = i3c_device_release; - dev_set_name(&desc->dev->dev, "%d-%llx", master->bus.id, - desc->info.pid); + + /* + * For devices without PID (e.g., SETAASA devices), use + * static address for naming instead. + */ + if (desc->info.pid) + dev_set_name(&desc->dev->dev, "%d-%llx", master->bus.id, + desc->info.pid); + else + dev_set_name(&desc->dev->dev, "%d-sa%02x", master->bus.id, + desc->info.static_addr); if (desc->boardinfo) - desc->dev->dev.of_node = desc->boardinfo->of_node; + device_set_node(&desc->dev->dev, desc->boardinfo->fwnode); ret = device_register(&desc->dev->dev); if (ret) { @@ -1924,6 +1985,12 @@ static int i3c_master_bus_init(struct i3c_master_controller *master) goto err_bus_cleanup; } + if (master->addr_method & BIT(1)) { + ret = i3c_master_setaasa_locked(master); + if (ret) + goto err_bus_cleanup; + } + /* Disable all slave events before starting DAA. */ ret = i3c_master_disec_locked(master, I3C_BROADCAST_ADDR, I3C_CCC_EVENT_SIR | I3C_CCC_EVENT_MR | @@ -2005,8 +2072,18 @@ static void i3c_master_attach_boardinfo(struct i3c_dev_desc *i3cdev) struct i3c_dev_boardinfo *i3cboardinfo; list_for_each_entry(i3cboardinfo, &master->boardinfo.i3c, node) { - if (i3cdev->info.pid != i3cboardinfo->pid) - continue; + /* + * For devices without PID (e.g., SETAASA devices), match by + * static address. For devices with PID, match by PID. + */ + if (i3cboardinfo->pid) { + if (i3cdev->info.pid != i3cboardinfo->pid) + continue; + } else { + if (!i3cboardinfo->static_addr || + i3cdev->info.static_addr != i3cboardinfo->static_addr) + continue; + } i3cdev->boardinfo = i3cboardinfo; i3cdev->info.static_addr = i3cboardinfo->static_addr; @@ -2020,8 +2097,12 @@ i3c_master_search_i3c_dev_duplicate(struct i3c_dev_desc *refdev) struct i3c_master_controller *master = i3c_dev_get_master(refdev); struct i3c_dev_desc *i3cdev; + if (!refdev->info.pid) + return NULL; + i3c_bus_for_each_i3cdev(&master->bus, i3cdev) { - if (i3cdev != refdev && i3cdev->info.pid == refdev->info.pid) + if (i3cdev != refdev && i3cdev->info.pid && + i3cdev->info.pid == refdev->info.pid) return i3cdev; } @@ -2187,21 +2268,58 @@ EXPORT_SYMBOL_GPL(i3c_master_add_i3c_dev_locked); #define OF_I3C_REG1_IS_I2C_DEV BIT(31) +static int i3c_acpi_get_i2c_resource(struct acpi_resource *ares, void *data) +{ + struct i2c_dev_boardinfo *boardinfo = data; + struct acpi_resource_i2c_serialbus *sb; + + if (!i2c_acpi_get_i2c_resource(ares, &sb)) + return 1; + + boardinfo->base.addr = sb->slave_address; + if (sb->access_mode == ACPI_I2C_10BIT_MODE) + boardinfo->base.flags |= I2C_CLIENT_TEN; + + boardinfo->lvr = sb->lvr; + + return 1; +} + static int -of_i3c_master_add_i2c_boardinfo(struct i3c_master_controller *master, - struct device_node *node, u32 *reg) +i3c_master_add_i2c_boardinfo(struct i3c_master_controller *master, + struct fwnode_handle *fwnode, u32 *reg) { struct i2c_dev_boardinfo *boardinfo; struct device *dev = &master->dev; - int ret; + struct acpi_device *adev; + LIST_HEAD(resources); + int ret = -EINVAL; boardinfo = devm_kzalloc(dev, sizeof(*boardinfo), GFP_KERNEL); if (!boardinfo) return -ENOMEM; - ret = of_i2c_get_board_info(dev, node, &boardinfo->base); - if (ret) - return ret; + if (is_of_node(fwnode)) { + ret = of_i2c_get_board_info(dev, to_of_node(fwnode), &boardinfo->base); + if (ret) + return ret; + + /* LVR is encoded in reg[2] for Device Tree. */ + boardinfo->lvr = reg[2]; + } else if (is_acpi_device_node(fwnode)) { + adev = to_acpi_device_node(fwnode); + if (!adev) + return -ENODEV; + + boardinfo->base.fwnode = acpi_fwnode_handle(adev); + ret = acpi_dev_get_resources(adev, &resources, + i3c_acpi_get_i2c_resource, boardinfo); + + if (ACPI_FAILURE(ret) || !boardinfo->base.addr) + return -EINVAL; + + acpi_dev_free_resource_list(&resources); + } /* * The I3C Specification does not clearly say I2C devices with 10-bit @@ -2213,23 +2331,20 @@ of_i3c_master_add_i2c_boardinfo(struct i3c_master_controller *master, return -EOPNOTSUPP; } - /* LVR is encoded in reg[2]. */ - boardinfo->lvr = reg[2]; - list_add_tail(&boardinfo->node, &master->boardinfo.i2c); - of_node_get(node); + fwnode_handle_get(fwnode); return 0; } static int -of_i3c_master_add_i3c_boardinfo(struct i3c_master_controller *master, - struct device_node *node, u32 *reg) +i3c_master_add_i3c_boardinfo(struct i3c_master_controller *master, + struct fwnode_handle *fwnode, u32 *reg) { struct i3c_dev_boardinfo *boardinfo; struct device *dev = &master->dev; enum i3c_addr_slot_status addrstatus; - u32 init_dyn_addr = 0; + u32 init_dyn_addr = 0, static_addr_method = 0; boardinfo = devm_kzalloc(dev, sizeof(*boardinfo), GFP_KERNEL); if (!boardinfo) @@ -2247,7 +2362,7 @@ of_i3c_master_add_i3c_boardinfo(struct i3c_master_controller *master, boardinfo->static_addr = reg[0]; - if (!of_property_read_u32(node, "assigned-address", &init_dyn_addr)) { + if (!fwnode_property_read_u32(fwnode, "assigned-address", &init_dyn_addr)) { if (init_dyn_addr > I3C_MAX_ADDR) return -EINVAL; @@ -2257,61 +2372,90 @@ of_i3c_master_add_i3c_boardinfo(struct i3c_master_controller *master, return -EINVAL; } + if (!fwnode_property_read_u32(fwnode, "mipi-i3c-static-method", &static_addr_method)) + boardinfo->static_addr_method = static_addr_method; + + /* Update the address methods required for device discovery */ + master->addr_method |= boardinfo->static_addr_method; + boardinfo->pid = ((u64)reg[1] << 32) | reg[2]; - if ((boardinfo->pid & GENMASK_ULL(63, 48)) || - I3C_PID_RND_LOWER_32BITS(boardinfo->pid)) - return -EINVAL; + /* Skip PID validation for SETAASA devices */ + if (!(boardinfo->static_addr_method & BIT(1))) { + if ((boardinfo->pid & GENMASK_ULL(63, 48)) || + I3C_PID_RND_LOWER_32BITS(boardinfo->pid)) + return -EINVAL; + } boardinfo->init_dyn_addr = init_dyn_addr; - boardinfo->of_node = of_node_get(node); + boardinfo->fwnode = fwnode_handle_get(fwnode); list_add_tail(&boardinfo->node, &master->boardinfo.i3c); return 0; } -static int of_i3c_master_add_dev(struct i3c_master_controller *master, - struct device_node *node) +static int i3c_master_add_of_dev(struct i3c_master_controller *master, + struct fwnode_handle *fwnode) { u32 reg[3]; int ret; - if (!master) - return -EINVAL; - - ret = of_property_read_u32_array(node, "reg", reg, ARRAY_SIZE(reg)); + ret = fwnode_property_read_u32_array(fwnode, "reg", reg, ARRAY_SIZE(reg)); if (ret) return ret; /* - * The manufacturer ID can't be 0. If reg[1] == 0 that means we're - * dealing with an I2C device. + * I3C device should have either the manufacturer ID specified or the + * address discovery method specified. Else treat it as an I2C device. */ - if (!reg[1]) - ret = of_i3c_master_add_i2c_boardinfo(master, node, reg); + if (!(reg[1] || fwnode_property_present(fwnode, "mipi-i3c-static-method"))) + ret = i3c_master_add_i2c_boardinfo(master, fwnode, reg); else - ret = of_i3c_master_add_i3c_boardinfo(master, node, reg); + ret = i3c_master_add_i3c_boardinfo(master, fwnode, reg); return ret; } -static int of_populate_i3c_bus(struct i3c_master_controller *master) +static int i3c_master_add_acpi_dev(struct i3c_master_controller *master, + struct fwnode_handle *fwnode) +{ + struct acpi_device *adev = to_acpi_device_node(fwnode); + u32 reg[3], adr; + + /* I2C device on an I3C bus should not have _ADR property as per spec */ + if (!acpi_has_method(adev->handle, "_ADR")) + return i3c_master_add_i2c_boardinfo(master, adev->handle, reg); + + adr = acpi_device_adr(adev); + + /* _ADR will have the 48 bit PID of the device */ + reg[1] = lower_32_bits(adr); + reg[2] = upper_32_bits(adr); + + fwnode_property_read_u32(fwnode, "mipi-i3c-static-address", ®[0]); + + return i3c_master_add_i3c_boardinfo(master, fwnode, reg); +} + +static int fwnode_populate_i3c_bus(struct i3c_master_controller *master) { struct device *dev = &master->dev; - struct device_node *i3cbus_np = dev->of_node; - struct device_node *node; - int ret; + struct fwnode_handle *fwnode = dev_fwnode(dev); + struct fwnode_handle *child; + int ret = -ENODEV; u32 val; - if (!i3cbus_np) + if (!fwnode) return 0; - for_each_available_child_of_node(i3cbus_np, node) { - ret = of_i3c_master_add_dev(master, node); - if (ret) { - of_node_put(node); + fwnode_for_each_available_child_node(fwnode, child) { + if (is_of_node(child)) + ret = i3c_master_add_of_dev(master, child); + else if (is_acpi_device_node(child)) + ret = i3c_master_add_acpi_dev(master, child); + + if (ret) return ret; - } } /* @@ -2319,10 +2463,10 @@ static int of_populate_i3c_bus(struct i3c_master_controller *master) * on the bus are not supporting typical rates, or if the bus topology * prevents it from using max possible rate. */ - if (!of_property_read_u32(i3cbus_np, "i2c-scl-hz", &val)) + if (!device_property_read_u32(dev, "i2c-scl-hz", &val)) master->bus.scl_rate.i2c = val; - if (!of_property_read_u32(i3cbus_np, "i3c-scl-hz", &val)) + if (!device_property_read_u32(dev, "i3c-scl-hz", &val)) master->bus.scl_rate.i3c = val; return 0; @@ -2369,10 +2513,28 @@ static u8 i3c_master_i2c_get_lvr(struct i2c_client *client) { /* Fall back to no spike filters and FM bus mode. */ u8 lvr = I3C_LVR_I2C_INDEX(2) | I3C_LVR_I2C_FM_MODE; + struct i2c_dev_boardinfo boardinfo; + struct acpi_device *adev; + LIST_HEAD(resources); u32 reg[3]; - if (!of_property_read_u32_array(client->dev.of_node, "reg", reg, ARRAY_SIZE(reg))) - lvr = reg[2]; + if (is_of_node(client->dev.fwnode)) { + if (!fwnode_property_read_u32_array(client->dev.fwnode, "reg", + reg, ARRAY_SIZE(reg))) + lvr = reg[2]; + } else if (is_acpi_device_node(client->dev.fwnode)) { + adev = to_acpi_device_node(client->dev.fwnode); + if (adev) { + memset(&boardinfo, 0, sizeof(boardinfo)); + acpi_dev_get_resources(adev, &resources, + i3c_acpi_get_i2c_resource, &boardinfo); + + if (boardinfo.base.addr) + lvr = boardinfo.lvr; + + acpi_dev_free_resource_list(&resources); + } + } return lvr; } @@ -2484,7 +2646,8 @@ static int i3c_master_i2c_adapter_init(struct i3c_master_controller *master) struct i2c_adapter *adap = i3c_master_to_i2c_adapter(master); struct i2c_dev_desc *i2cdev; struct i2c_dev_boardinfo *i2cboardinfo; - int ret, id; + struct fwnode_handle *fwnode = dev_fwnode(&master->dev); + int ret, id = -1; adap->dev.parent = master->dev.parent; adap->owner = master->dev.parent->driver->owner; @@ -2495,7 +2658,9 @@ static int i3c_master_i2c_adapter_init(struct i3c_master_controller *master) adap->timeout = HZ; adap->retries = 3; - id = of_alias_get_id(master->dev.of_node, "i2c"); + if (fwnode && is_of_node(fwnode)) + id = of_alias_get_id(to_of_node(fwnode), "i2c"); + if (id >= 0) { adap->nr = id; ret = i2c_add_numbered_adapter(adap); @@ -2802,16 +2967,17 @@ int i3c_master_register(struct i3c_master_controller *master, return ret; master->dev.parent = parent; - master->dev.of_node = of_node_get(parent->of_node); + device_set_node(&master->dev, fwnode_handle_get(dev_fwnode(parent))); master->dev.bus = &i3c_bus_type; master->dev.type = &i3c_masterdev_type; master->dev.release = i3c_masterdev_release; master->ops = ops; master->secondary = secondary; + master->addr_method = BIT(0); INIT_LIST_HEAD(&master->boardinfo.i2c); INIT_LIST_HEAD(&master->boardinfo.i3c); - ret = i3c_bus_init(i3cbus, master->dev.of_node); + ret = i3c_bus_init(i3cbus, dev_fwnode(&master->dev)); if (ret) return ret; @@ -2822,7 +2988,7 @@ int i3c_master_register(struct i3c_master_controller *master, master->dev.coherent_dma_mask = parent->coherent_dma_mask; master->dev.dma_parms = parent->dma_parms; - ret = of_populate_i3c_bus(master); + ret = fwnode_populate_i3c_bus(master); if (ret) goto err_put_dev; @@ -3062,11 +3228,14 @@ static int __init i3c_init(void) { int res; - res = of_alias_get_highest_id("i3c"); - if (res >= 0) { - mutex_lock(&i3c_core_lock); - __i3c_first_dynamic_bus_num = res + 1; - mutex_unlock(&i3c_core_lock); + /* of_alias_get_highest_id is DT-specific, only call for DT systems */ + if (IS_ENABLED(CONFIG_OF)) { + res = of_alias_get_highest_id("i3c"); + if (res >= 0) { + mutex_lock(&i3c_core_lock); + __i3c_first_dynamic_bus_num = res + 1; + mutex_unlock(&i3c_core_lock); + } } res = bus_register_notifier(&i2c_bus_type, &i2cdev_notifier); diff --git a/drivers/i3c/master/dw-i3c-master.c b/drivers/i3c/master/dw-i3c-master.c index 9ceedf09c3b6a..a13ade81cae8d 100644 --- a/drivers/i3c/master/dw-i3c-master.c +++ b/drivers/i3c/master/dw-i3c-master.c @@ -283,6 +283,8 @@ static bool dw_i3c_master_supports_ccc_cmd(struct i3c_master_controller *m, case I3C_CCC_GETSTATUS: case I3C_CCC_GETMXDS: case I3C_CCC_GETHDRCAP: + case I3C_CCC_SETAASA: + case I3C_CCC_VENDOR(0, true): /* SETHID */ return true; default: return false; @@ -541,13 +543,20 @@ static void dw_i3c_master_set_intr_regs(struct dw_i3c_master *master) static int dw_i3c_clk_cfg(struct dw_i3c_master *master) { - unsigned long core_rate, core_period; + unsigned int core_rate, core_period; u32 scl_timing; u8 hcnt, lcnt; + int ret = 0; - core_rate = clk_get_rate(master->core_clk); - if (!core_rate) - return -EINVAL; + if (ACPI_HANDLE(master->dev)) { + ret = device_property_read_u32(master->dev, "clock-frequency", &core_rate); + if (ret) + return ret; + } else { + core_rate = clk_get_rate(master->core_clk); + if (!core_rate) + return -EINVAL; + } core_period = DIV_ROUND_UP(1000000000, core_rate); @@ -594,13 +603,20 @@ static int dw_i3c_clk_cfg(struct dw_i3c_master *master) static int dw_i2c_clk_cfg(struct dw_i3c_master *master) { - unsigned long core_rate, core_period; + unsigned int core_rate, core_period; u16 hcnt, lcnt; u32 scl_timing; + int ret = 0; - core_rate = clk_get_rate(master->core_clk); - if (!core_rate) - return -EINVAL; + if (ACPI_HANDLE(master->dev)) { + ret = device_property_read_u32(master->dev, "clock-frequency", &core_rate); + if (ret) + return ret; + } else { + core_rate = clk_get_rate(master->core_clk); + if (!core_rate) + return -EINVAL; + } core_period = DIV_ROUND_UP(1000000000, core_rate); @@ -1545,20 +1561,22 @@ int dw_i3c_common_probe(struct dw_i3c_master *master, if (IS_ERR(master->regs)) return PTR_ERR(master->regs); - master->core_clk = devm_clk_get_enabled(&pdev->dev, NULL); - if (IS_ERR(master->core_clk)) - return PTR_ERR(master->core_clk); + if (!ACPI_HANDLE(&pdev->dev)) { + master->core_clk = devm_clk_get_enabled(&pdev->dev, NULL); + if (IS_ERR(master->core_clk)) + return PTR_ERR(master->core_clk); - master->pclk = devm_clk_get_optional_enabled(&pdev->dev, "pclk"); - if (IS_ERR(master->pclk)) - return PTR_ERR(master->pclk); + master->pclk = devm_clk_get_optional_enabled(&pdev->dev, "pclk"); + if (IS_ERR(master->pclk)) + return PTR_ERR(master->pclk); - master->core_rst = devm_reset_control_get_optional_exclusive(&pdev->dev, - "core_rst"); - if (IS_ERR(master->core_rst)) - return PTR_ERR(master->core_rst); + master->core_rst = devm_reset_control_get_optional_exclusive(&pdev->dev, + "core_rst"); + if (IS_ERR(master->core_rst)) + return PTR_ERR(master->core_rst); - reset_control_deassert(master->core_rst); + reset_control_deassert(master->core_rst); + } spin_lock_init(&master->xferqueue.lock); INIT_LIST_HEAD(&master->xferqueue.list); @@ -1765,11 +1783,12 @@ static const struct of_device_id dw_i3c_master_of_match[] = { }; MODULE_DEVICE_TABLE(of, dw_i3c_master_of_match); -static const struct acpi_device_id amd_i3c_device_match[] = { +static const struct acpi_device_id dw_i3c_master_acpi_match[] = { { "AMDI0015", AMD_I3C_OD_PP_TIMING }, + { "NVDA2018" }, { } }; -MODULE_DEVICE_TABLE(acpi, amd_i3c_device_match); +MODULE_DEVICE_TABLE(acpi, dw_i3c_master_acpi_match); static struct platform_driver dw_i3c_driver = { .probe = dw_i3c_probe, @@ -1778,7 +1797,7 @@ static struct platform_driver dw_i3c_driver = { .driver = { .name = "dw-i3c-master", .of_match_table = dw_i3c_master_of_match, - .acpi_match_table = amd_i3c_device_match, + .acpi_match_table = dw_i3c_master_acpi_match, .pm = &dw_i3c_pm_ops, }, }; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 6a25da00cfc37..b6466a261ffb6 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -3671,13 +3672,105 @@ static int arm_smmu_def_domain_type(struct device *dev) if (IS_HISI_PTT_DEVICE(pdev)) return IOMMU_DOMAIN_IDENTITY; - if (pdev->vendor == PCI_VENDOR_ID_NVIDIA && pdev->device == 0x2E12) + if (pdev->vendor == PCI_VENDOR_ID_NVIDIA && + (pdev->device == 0x2E12 || pdev->device == 0x2E2A || + pdev->device == 0x2E2B)) return IOMMU_DOMAIN_DMA; } return 0; } +static int arm_smmu_group_set_mpam(struct iommu_group *group, u16 partid, + u8 pmg) +{ + int i; + u32 sid; + unsigned long flags; + struct arm_smmu_ste *step; + struct iommu_domain *domain; + struct arm_smmu_device *smmu; + struct arm_smmu_master *master; + struct arm_smmu_cmdq_batch cmds; + struct arm_smmu_domain *smmu_domain; + struct arm_smmu_cmdq_ent cmd = { + .opcode = CMDQ_OP_CFGI_STE, + .cfgi = { + .leaf = true, + }, + }; + struct arm_smmu_master_domain *master_domain; + + domain = iommu_get_domain_for_group(group); + smmu_domain = to_smmu_domain(domain); + if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_MPAM)) + return -EIO; + smmu = smmu_domain->smmu; + + arm_smmu_cmdq_batch_init(smmu, &cmds, &cmd); + + spin_lock_irqsave(&smmu_domain->devices_lock, flags); + list_for_each_entry(master_domain, &smmu_domain->devices, + devices_elm) { + master = master_domain->master; + + for (i = 0; i < master->num_streams; i++) { + sid = master->streams[i].id; + step = arm_smmu_get_step_for_sid(smmu, sid); + + /* These need locking if the VMSPtr is ever used */ + step->data[4] = FIELD_PREP(STRTAB_STE_4_PARTID, partid); + step->data[5] = FIELD_PREP(STRTAB_STE_5_PMG, pmg); + + cmd.cfgi.sid = sid; + arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); + } + + master->partid = partid; + master->pmg = pmg; + } + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); + + arm_smmu_cmdq_batch_submit(smmu, &cmds); + + return 0; +} + +static int arm_smmu_group_get_mpam(struct iommu_group *group, u16 *partid, + u8 *pmg) +{ + int err = -EINVAL; + unsigned long flags; + struct iommu_domain *domain; + struct arm_smmu_master *master; + struct arm_smmu_domain *smmu_domain; + struct arm_smmu_master_domain *master_domain; + + domain = iommu_get_domain_for_group(group); + smmu_domain = to_smmu_domain(domain); + if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_MPAM)) + return -EIO; + + if (!partid && !pmg) + return 0; + + spin_lock_irqsave(&smmu_domain->devices_lock, flags); + list_for_each_entry(master_domain, &smmu_domain->devices, + devices_elm) { + master = master_domain->master; + if (master) { + if (partid) + *partid = master->partid; + if (pmg) + *pmg = master->pmg; + err = 0; + } + } + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); + + return err; +} + static const struct iommu_ops arm_smmu_ops = { .identity_domain = &arm_smmu_identity_domain, .blocked_domain = &arm_smmu_blocked_domain, @@ -3690,6 +3783,8 @@ static const struct iommu_ops arm_smmu_ops = { .device_group = arm_smmu_device_group, .of_xlate = arm_smmu_of_xlate, .get_resv_regions = arm_smmu_get_resv_regions, + .get_group_qos_params = arm_smmu_group_get_mpam, + .set_group_qos_params = arm_smmu_group_set_mpam, .page_response = arm_smmu_page_response, .def_domain_type = arm_smmu_def_domain_type, .get_viommu_size = arm_smmu_get_viommu_size, @@ -4307,6 +4402,29 @@ static void arm_smmu_get_httu(struct arm_smmu_device *smmu, u32 reg) hw_features, fw_features); } +static void arm_smmu_mpam_register_smmu(struct arm_smmu_device *smmu) +{ + u16 partid_max; + u8 pmg_max; + u32 reg; + + if (!IS_ENABLED(CONFIG_ARM64_MPAM)) + return; + + if (!(smmu->features & ARM_SMMU_FEAT_MPAM)) + return; + + reg = readl_relaxed(smmu->base + ARM_SMMU_MPAMIDR); + if (!reg) + return; + + partid_max = FIELD_GET(SMMU_MPAMIDR_PARTID_MAX, reg); + pmg_max = FIELD_GET(SMMU_MPAMIDR_PMG_MAX, reg); + + if (mpam_register_requestor(partid_max, pmg_max)) + smmu->features &= ~ARM_SMMU_FEAT_MPAM; +} + static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) { u32 reg; @@ -4460,6 +4578,8 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) smmu->features |= ARM_SMMU_FEAT_RANGE_INV; if (FIELD_GET(IDR3_FWB, reg)) smmu->features |= ARM_SMMU_FEAT_S2FWB; + if (FIELD_GET(IDR3_MPAM, reg)) + smmu->features |= ARM_SMMU_FEAT_MPAM; if (FIELD_GET(IDR3_BBM, reg) == 2) smmu->features |= ARM_SMMU_FEAT_BBML2; @@ -4527,6 +4647,8 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) if (arm_smmu_sva_supported(smmu)) smmu->features |= ARM_SMMU_FEAT_SVA; + arm_smmu_mpam_register_smmu(smmu); + dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n", smmu->ias, smmu->oas, smmu->features); return 0; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index ae23aacc38402..09a9c77d9140f 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -59,6 +59,7 @@ struct arm_vsmmu; #define IDR1_SIDSIZE GENMASK(5, 0) #define ARM_SMMU_IDR3 0xc +#define IDR3_MPAM (1 << 7) #define IDR3_FWB (1 << 8) #define IDR3_RIL (1 << 10) #define IDR3_BBM GENMASK(12, 11) @@ -170,6 +171,10 @@ struct arm_vsmmu; #define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8 #define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc +#define ARM_SMMU_MPAMIDR 0x130 +#define SMMU_MPAMIDR_PARTID_MAX GENMASK(15, 0) +#define SMMU_MPAMIDR_PMG_MAX GENMASK(23, 16) + #define ARM_SMMU_REG_SZ 0xe00 /* Common MSI config fields */ @@ -271,6 +276,7 @@ static inline u32 arm_smmu_strtab_l2_idx(u32 sid) #define STRTAB_STE_1_MEV (1UL << 19) #define STRTAB_STE_1_S2FWB (1UL << 25) #define STRTAB_STE_1_S1STALLD (1UL << 27) +#define STRTAB_STE_1_S1MPAM (1UL << 26) #define STRTAB_STE_1_EATS GENMASK_ULL(29, 28) #define STRTAB_STE_1_EATS_ABT 0UL @@ -301,6 +307,10 @@ static inline u32 arm_smmu_strtab_l2_idx(u32 sid) #define STRTAB_STE_3_S2TTB_MASK GENMASK_ULL(51, 4) +#define STRTAB_STE_4_PARTID GENMASK_ULL(31, 16) + +#define STRTAB_STE_5_PMG GENMASK_ULL(7, 0) + /* These bits can be controlled by userspace for STRTAB_STE_0_CFG_NESTED */ #define STRTAB_STE_0_NESTING_ALLOWED \ cpu_to_le64(STRTAB_STE_0_V | STRTAB_STE_0_CFG | STRTAB_STE_0_S1FMT | \ @@ -768,6 +778,7 @@ struct arm_smmu_device { #define ARM_SMMU_FEAT_HD (1 << 22) #define ARM_SMMU_FEAT_S2FWB (1 << 23) #define ARM_SMMU_FEAT_BBML2 (1 << 24) +#define ARM_SMMU_FEAT_MPAM (1 << 25) u32 features; #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) @@ -852,6 +863,8 @@ struct arm_smmu_master { bool stall_enabled; unsigned int ssid_bits; unsigned int iopf_refcount; + u16 partid; + u8 pmg; }; /* SMMU private data for an IOMMU domain */ diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 59244c744eabd..db770b73e3a8f 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1087,6 +1087,45 @@ struct iommu_group *iommu_group_alloc(void) } EXPORT_SYMBOL_GPL(iommu_group_alloc); +struct iommu_group *iommu_group_get_from_kobj(struct kobject *group_kobj) +{ + struct iommu_group *group; + + if (!iommu_group_kset || !group_kobj) + return NULL; + + group = container_of(group_kobj, struct iommu_group, kobj); + + kobject_get(group->devices_kobj); + kobject_put(&group->kobj); + + return group; +} + +struct iommu_group *iommu_group_get_by_id(int id) +{ + struct kobject *group_kobj; + const char *name; + + if (!iommu_group_kset) + return NULL; + + name = kasprintf(GFP_KERNEL, "%d", id); + if (!name) + return NULL; + + group_kobj = kset_find_obj(iommu_group_kset, name); + kfree(name); + + return iommu_group_get_from_kobj(group_kobj); +} +EXPORT_SYMBOL_GPL(iommu_group_get_by_id); + +struct kset *iommu_get_group_kset(void) +{ + return kset_get(iommu_group_kset); +} + /** * iommu_group_get_iommudata - retrieve iommu_data registered for a group * @group: the group @@ -2207,6 +2246,12 @@ struct iommu_domain *iommu_get_domain_for_dev(struct device *dev) } EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev); +struct iommu_domain *iommu_get_domain_for_group(struct iommu_group *group) +{ + return group->domain; +} +EXPORT_SYMBOL_GPL(iommu_get_domain_for_group); + /* * For IOMMU_DOMAIN_DMA implementations which already provide their own * guarantees that the group and its default domain are valid and correct. @@ -3855,3 +3900,79 @@ int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) return ret; } #endif /* CONFIG_IRQ_MSI_IOMMU */ + +/* + * iommu_group_set_qos_params() - Set the QoS parameters for a group + * @group: the iommu group. + * @partition: the partition label all traffic from the group should use. + * @perf_mon_grp: the performance label all traffic from the group should use. + * + * Return: 0 on success, or an error. + */ +int iommu_group_set_qos_params(struct iommu_group *group, + u16 partition, u8 perf_mon_grp) +{ + const struct iommu_ops *ops; + struct group_device *device; + int ret; + + mutex_lock(&group->mutex); + device = list_first_entry_or_null(&group->devices, typeof(*device), + list); + if (!device) { + ret = -ENODEV; + goto out_unlock; + } + + ops = dev_iommu_ops(device->dev); + if (!ops->set_group_qos_params) { + ret = -EOPNOTSUPP; + goto out_unlock; + } + + ret = ops->set_group_qos_params(group, partition, perf_mon_grp); + +out_unlock: + mutex_unlock(&group->mutex); + + return ret; +} +EXPORT_SYMBOL_NS_GPL(iommu_group_set_qos_params, "IOMMUFD_INTERNAL"); + +/* + * iommu_group_get_qos_params() - Get the QoS parameters for a group + * @group: the iommu group. + * @partition: the partition label all traffic from the group uses. + * @perf_mon_grp: the performance label all traffic from the group uses. + * + * Return: 0 on success, or an error. + */ +int iommu_group_get_qos_params(struct iommu_group *group, + u16 *partition, u8 *perf_mon_grp) +{ + const struct iommu_ops *ops; + struct group_device *device; + int ret; + + mutex_lock(&group->mutex); + device = list_first_entry_or_null(&group->devices, typeof(*device), + list); + if (!device) { + ret = -ENODEV; + goto out_unlock; + } + + ops = dev_iommu_ops(device->dev); + if (!ops->get_group_qos_params) { + ret = -EOPNOTSUPP; + goto out_unlock; + } + + ret = ops->get_group_qos_params(group, partition, perf_mon_grp); + +out_unlock: + mutex_unlock(&group->mutex); + + return ret; +} +EXPORT_SYMBOL_NS_GPL(iommu_group_get_qos_params, "IOMMUFD_INTERNAL"); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.c b/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.c index 1921741f7311d..18b08277d2e1a 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.c @@ -15,6 +15,7 @@ #include "aq_hw.h" #include "aq_nic.h" +#include "hw_atl/hw_atl_llh.h" void aq_hw_write_reg_bit(struct aq_hw_s *aq_hw, u32 addr, u32 msk, u32 shift, u32 val) @@ -81,6 +82,27 @@ void aq_hw_write_reg64(struct aq_hw_s *hw, u32 reg, u64 value) lo_hi_writeq(value, hw->mmio + reg); } +int aq_hw_invalidate_descriptor_cache(struct aq_hw_s *hw) +{ + int err; + u32 val; + + /* Invalidate Descriptor Cache to prevent writing to the cached + * descriptors and to the data pointer of those descriptors + */ + hw_atl_rdm_rx_dma_desc_cache_init_tgl(hw); + + err = aq_hw_err_from_flags(hw); + if (err) + goto err_exit; + + readx_poll_timeout_atomic(hw_atl_rdm_rx_dma_desc_cache_init_done_get, + hw, val, val == 1, 1000U, 10000U); + +err_exit: + return err; +} + int aq_hw_err_from_flags(struct aq_hw_s *hw) { int err = 0; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.h index ffa6e4067c211..d89c63d88e4a4 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.h @@ -35,6 +35,7 @@ u32 aq_hw_read_reg(struct aq_hw_s *hw, u32 reg); void aq_hw_write_reg(struct aq_hw_s *hw, u32 reg, u32 value); u64 aq_hw_read_reg64(struct aq_hw_s *hw, u32 reg); void aq_hw_write_reg64(struct aq_hw_s *hw, u32 reg, u64 value); +int aq_hw_invalidate_descriptor_cache(struct aq_hw_s *hw); int aq_hw_err_from_flags(struct aq_hw_s *hw); int aq_hw_num_tcs(struct aq_hw_s *hw); int aq_hw_q_per_tc(struct aq_hw_s *hw); diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 493432d036b9a..c7895bfb2ecf8 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -1198,26 +1198,9 @@ static int hw_atl_b0_hw_interrupt_moderation_set(struct aq_hw_s *self) static int hw_atl_b0_hw_stop(struct aq_hw_s *self) { - int err; - u32 val; - hw_atl_b0_hw_irq_disable(self, HW_ATL_B0_INT_MASK); - /* Invalidate Descriptor Cache to prevent writing to the cached - * descriptors and to the data pointer of those descriptors - */ - hw_atl_rdm_rx_dma_desc_cache_init_tgl(self); - - err = aq_hw_err_from_flags(self); - - if (err) - goto err_exit; - - readx_poll_timeout_atomic(hw_atl_rdm_rx_dma_desc_cache_init_done_get, - self, val, val == 1, 1000U, 10000U); - -err_exit: - return err; + return aq_hw_invalidate_descriptor_cache(self); } int hw_atl_b0_hw_ring_tx_stop(struct aq_hw_s *self, struct aq_ring_s *ring) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c index b0ed572e88c67..0ce9caae8799c 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c @@ -759,7 +759,7 @@ static int hw_atl2_hw_stop(struct aq_hw_s *self) { hw_atl_b0_hw_irq_disable(self, HW_ATL2_INT_MASK); - return 0; + return aq_hw_invalidate_descriptor_cache(self); } static struct aq_stats_s *hw_atl2_utils_get_hw_stats(struct aq_hw_s *self) diff --git a/drivers/net/ethernet/realtek/r8127/r8127_n.c b/drivers/net/ethernet/realtek/r8127/r8127_n.c index 2c9c262abe2ea..9e39016ea2c80 100755 --- a/drivers/net/ethernet/realtek/r8127/r8127_n.c +++ b/drivers/net/ethernet/realtek/r8127/r8127_n.c @@ -1216,39 +1216,6 @@ static int proc_get_registers(struct seq_file *m, void *v) return 0; } -static int proc_get_all_registers(struct seq_file *m, void *v) -{ - struct net_device *dev = m->private; - int i, n, max; - u8 byte_rd; - struct rtl8127_private *tp = netdev_priv(dev); - void __iomem *ioaddr = tp->mmio_addr; - struct pci_dev *pdev = tp->pci_dev; - - seq_puts(m, "\nDump All MAC Registers\n"); - seq_puts(m, "Offset\tValue\n------\t-----\n"); - - max = pci_resource_len(pdev, 2); - - for (n = 0; n < max;) { - seq_printf(m, "\n0x%04x:\t", n); - - rtnl_lock(); - - for (i = 0; i < 16 && n < max; i++, n++) { - byte_rd = readb(ioaddr + n); - seq_printf(m, "%02x ", byte_rd); - } - - rtnl_unlock(); - } - - seq_printf(m, "\nTotal length:0x%X", max); - - seq_putc(m, '\n'); - return 0; -} - static int proc_get_pcie_phy(struct seq_file *m, void *v) { struct net_device *dev = m->private; @@ -2143,49 +2110,6 @@ static int proc_get_registers(char *page, char **start, return len; } -static int proc_get_all_registers(char *page, char **start, - off_t offset, int count, - int *eof, void *data) -{ - struct net_device *dev = data; - int i, n, max; - u8 byte_rd; - struct rtl8127_private *tp = netdev_priv(dev); - void __iomem *ioaddr = tp->mmio_addr; - struct pci_dev *pdev = tp->pci_dev; - int len = 0; - - len += snprintf(page + len, count - len, - "\nDump All MAC Registers\n" - "Offset\tValue\n------\t-----\n"); - - max = pci_resource_len(pdev, 2); - - for (n = 0; n < max;) { - len += snprintf(page + len, count - len, - "\n0x%04x:\t", - n); - - rtnl_lock(); - - for (i = 0; i < 16 && n < max; i++, n++) { - byte_rd = readb(ioaddr + n); - len += snprintf(page + len, count - len, - "%02x ", - byte_rd); - } - - rtnl_unlock(); - } - - len += snprintf(page + len, count - len, "\nTotal length:0x%X", max); - - len += snprintf(page + len, count - len, "\n"); - - *eof = 1; - return len; -} - static int proc_get_pcie_phy(char *page, char **start, off_t offset, int count, int *eof, void *data) @@ -2784,7 +2708,6 @@ static const struct rtl8127_proc_file rtl8127_debug_proc_files[] = { { "driver_var", &proc_get_driver_variable }, { "tally", &proc_get_tally_counter }, { "registers", &proc_get_registers }, - { "registers2", &proc_get_all_registers }, { "pcie_phy", &proc_get_pcie_phy }, { "eth_phy", &proc_get_eth_phy }, { "ext_regs", &proc_get_extended_registers }, @@ -14375,7 +14298,7 @@ rtl8127_init_one(struct pci_dev *pdev, rtl8127_sysfs_init(dev); #endif /* ENABLE_R8127_SYSFS */ - printk("%s", GPL_CLAIM); + printk(KERN_INFO "%s", GPL_CLAIM); out: return rc; diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c index 10d68d241ba1f..90a68a59af2e7 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c @@ -1280,6 +1280,8 @@ int mt7925_mcu_set_mlo_roc(struct mt792x_bss_conf *mconf, u16 sel_links, .roc[1].len = cpu_to_le16(sizeof(struct roc_acquire_tlv)) }; + struct wiphy *wiphy = mvif->phy->mt76->hw->wiphy; + if (!mconf || hweight16(vif->valid_links) < 2 || hweight16(sel_links) != 2) return -EPERM; @@ -1302,7 +1304,8 @@ int mt7925_mcu_set_mlo_roc(struct mt792x_bss_conf *mconf, u16 sel_links, is_AG_band |= links[i].chan->band == NL80211_BAND_2GHZ; } - if (vif->cfg.eml_cap & IEEE80211_EML_CAP_EMLSR_SUPP) + if (!(wiphy->iftype_ext_capab[0].mld_capa_and_ops & + IEEE80211_MLD_CAP_OP_MAX_SIMUL_LINKS)) type = is_AG_band ? MT7925_ROC_REQ_MLSR_AG : MT7925_ROC_REQ_MLSR_AA; else diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile index 6414ec968f99a..2fdd327bf6a88 100644 --- a/drivers/nvme/host/Makefile +++ b/drivers/nvme/host/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 ccflags-y += -I$(src) - +ccflags-y += -DCONFIG_NVFS obj-$(CONFIG_NVME_CORE) += nvme-core.o obj-$(CONFIG_BLK_DEV_NVME) += nvme.o obj-$(CONFIG_NVME_FABRICS) += nvme-fabrics.o @@ -20,10 +20,11 @@ nvme-core-$(CONFIG_NVME_HWMON) += hwmon.o nvme-core-$(CONFIG_NVME_HOST_AUTH) += auth.o nvme-y += pci.o - +nvme-y += nvfs-dma.o nvme-fabrics-y += fabrics.o nvme-rdma-y += rdma.o +nvme-rdma-y += nvfs-rdma.o nvme-fc-y += fc.o diff --git a/drivers/nvme/host/nvfs-dma.c b/drivers/nvme/host/nvfs-dma.c new file mode 100644 index 0000000000000..33a27c3aeca90 --- /dev/null +++ b/drivers/nvme/host/nvfs-dma.c @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ +#ifdef CONFIG_NVFS +#define NVFS_USE_DMA_ITER_API +#define MODULE_PREFIX nvme_v2 +#include "nvfs.h" + +struct nvfs_dma_rw_blk_iter_ops *nvfs_ops = NULL; + +atomic_t nvfs_shutdown = ATOMIC_INIT(1); + +DEFINE_PER_CPU(long, nvfs_n_ops); + +#define NVIDIA_FS_COMPAT_FT(ops) \ + (NVIDIA_FS_CHECK_FT_BLK_DMA_MAP_ITER_START(ops) && NVIDIA_FS_CHECK_FT_BLK_DMA_MAP_ITER_NEXT(ops)) + +// protected via nvfs_module_mutex +int REGISTER_FUNC(struct nvfs_dma_rw_blk_iter_ops *ops) +{ + if (NVIDIA_FS_COMPAT_FT(ops)) { + nvfs_ops = ops; + atomic_set(&nvfs_shutdown, 0); + return 0; + } else + return -EOPNOTSUPP; + +} +EXPORT_SYMBOL_GPL(REGISTER_FUNC); + +// protected via nvfs_module_mutex +void UNREGISTER_FUNC(void) +{ + (void) atomic_cmpxchg(&nvfs_shutdown, 0, 1); + do { + msleep(NVFS_HOLD_TIME_MS); + } while(nvfs_count_ops()); + nvfs_ops = NULL; +} +EXPORT_SYMBOL_GPL(UNREGISTER_FUNC); +#endif diff --git a/drivers/nvme/host/nvfs-dma.h b/drivers/nvme/host/nvfs-dma.h new file mode 100644 index 0000000000000..7876bb7a4a1b7 --- /dev/null +++ b/drivers/nvme/host/nvfs-dma.h @@ -0,0 +1,180 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef NVFS_DMA_H +#define NVFS_DMA_H + +/* Forward declarations for functions from pci.c that we need */ +static blk_status_t nvme_pci_setup_data_prp(struct request *req, + struct blk_dma_iter *iter); +static blk_status_t nvme_pci_setup_data_sgl(struct request *req, + struct blk_dma_iter *iter); +static inline struct dma_pool *nvme_dma_pool(struct nvme_queue *nvmeq, + struct nvme_iod *iod); +static inline dma_addr_t nvme_pci_first_desc_dma_addr(struct nvme_command *cmd); + +static inline bool nvme_nvfs_unmap_sgls(struct request *req) +{ + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + struct nvme_queue *nvmeq = req->mq_hctx->driver_data; + struct device *dma_dev = nvmeq->dev->dev; + dma_addr_t sqe_dma_addr = le64_to_cpu(iod->cmd.common.dptr.sgl.addr); + unsigned int sqe_dma_len = le32_to_cpu(iod->cmd.common.dptr.sgl.length); + struct nvme_sgl_desc *sg_list = iod->descriptors[0]; + enum dma_data_direction dir = rq_dma_dir(req); + + if (iod->nr_descriptors) { + unsigned int nr_entries = sqe_dma_len / sizeof(*sg_list), i; + + for (i = 0; i < nr_entries; i++) { + nvfs_ops->nvfs_dma_unmap_page(dma_dev, + iod->nvfs_cookie, + le64_to_cpu(sg_list[i].addr), + le32_to_cpu(sg_list[i].length), + dir); + } + } else + nvfs_ops->nvfs_dma_unmap_page(dma_dev, iod->nvfs_cookie, sqe_dma_addr, sqe_dma_len, dir); + + + + return true; +} + +static inline bool nvme_nvfs_unmap_prps(struct request *req) +{ + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + struct nvme_queue *nvmeq = req->mq_hctx->driver_data; + struct device *dma_dev = nvmeq->dev->dev; + enum dma_data_direction dma_dir = rq_dma_dir(req); + unsigned int i; + + /* Check if dma_vecs was allocated - if setup failed early, it might be NULL */ + if (!iod->dma_vecs) + return true; + + /* Unmap all DMA vectors - pass page pointer from dma_vecs */ + for (i = 0; i < iod->nr_dma_vecs; i++) { + nvfs_ops->nvfs_dma_unmap_page(dma_dev, + iod->nvfs_cookie, + iod->dma_vecs[i].addr, + iod->dma_vecs[i].len, + dma_dir); + } + + /* Free the dma_vecs mempool allocation */ + mempool_free(iod->dma_vecs, nvmeq->dev->dmavec_mempool); + iod->dma_vecs = NULL; + iod->nr_dma_vecs = 0; + + return true; +} + +static inline void nvme_nvfs_free_descriptors(struct request *req) +{ + struct nvme_queue *nvmeq = req->mq_hctx->driver_data; + const int last_prp = NVME_CTRL_PAGE_SIZE / sizeof(__le64) - 1; + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + dma_addr_t dma_addr = nvme_pci_first_desc_dma_addr(&iod->cmd); + int i; + + if (iod->nr_descriptors == 1) { + dma_pool_free(nvme_dma_pool(nvmeq, iod), iod->descriptors[0], + dma_addr); + return; + } + + for (i = 0; i < iod->nr_descriptors; i++) { + __le64 *prp_list = iod->descriptors[i]; + dma_addr_t next_dma_addr = le64_to_cpu(prp_list[last_prp]); + + dma_pool_free(nvmeq->descriptor_pools.large, prp_list, + dma_addr); + dma_addr = next_dma_addr; + } +} + +static inline bool nvme_nvfs_unmap_data(struct request *req) +{ + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + bool ret; + + /* Check if this was an NVFS I/O by checking the IOD_NVFS_IO flag */ + if (!(iod->flags & IOD_NVFS_IO)) + return false; + + /* Clear the NVFS flag */ + iod->flags &= ~IOD_NVFS_IO; + + /* Call appropriate unmap function based on command type */ + if (nvme_pci_cmd_use_sgl(&iod->cmd)) + ret = nvme_nvfs_unmap_sgls(req); + else + ret = nvme_nvfs_unmap_prps(req); + + if (iod->nr_descriptors) + nvme_nvfs_free_descriptors(req); + + nvfs_put_ops(); + return ret; +} + +static inline blk_status_t nvme_nvfs_map_data(struct request *req, + bool *is_nvfs_io) +{ + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + struct nvme_queue *nvmeq = req->mq_hctx->driver_data; + struct nvme_dev *dev = nvmeq->dev; + struct device *dma_dev = nvmeq->dev->dev; + enum nvme_use_sgl use_sgl = nvme_pci_use_sgls(dev, req); + struct blk_dma_iter iter; + blk_status_t ret = BLK_STS_RESOURCE; + + *is_nvfs_io = false; + + /* Check integrity and try to get nvfs_ops */ + if (blk_integrity_rq(req) || !nvfs_get_ops()) { + return ret; + } + + /* Initialize total_len for this request */ + iod->total_len = 0; + + if (!nvfs_ops->nvfs_blk_rq_dma_map_iter_start(req, dma_dev, + &iod->dma_state, &iter, &iod->nvfs_cookie)) { + nvfs_put_ops(); + ret = BLK_STS_IOERR; + return ret; + } + + /* NVFS can handle this request, set the flag */ + *is_nvfs_io = true; + iod->flags |= IOD_NVFS_IO; + + if (use_sgl == SGL_FORCED || + (use_sgl == SGL_SUPPORTED && + (sgl_threshold && nvme_pci_avg_seg_size(req) >= sgl_threshold))) + ret = nvme_pci_setup_data_sgl(req, &iter); + else + ret = nvme_pci_setup_data_prp(req, &iter); + + /* If setup failed, cleanup: unmap DMA, clear flag, release ops */ + if (ret != BLK_STS_OK) { + nvme_nvfs_unmap_data(req); + } + + return ret; +} + +#endif /* NVFS_DMA_H */ diff --git a/drivers/nvme/host/nvfs-rdma.c b/drivers/nvme/host/nvfs-rdma.c new file mode 100644 index 0000000000000..4b06e45883539 --- /dev/null +++ b/drivers/nvme/host/nvfs-rdma.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifdef CONFIG_NVFS +#define MODULE_PREFIX nvme_rdma_v1 +#include "nvfs.h" + +struct nvfs_dma_rw_ops *nvfs_ops; + +atomic_t nvfs_shutdown = ATOMIC_INIT(1); + +DEFINE_PER_CPU(long, nvfs_n_ops); + +// must have for compatability +#define NVIDIA_FS_COMPAT_FT(ops) \ + (NVIDIA_FS_CHECK_FT_SGLIST_PREP(ops) && NVIDIA_FS_CHECK_FT_SGLIST_DMA(ops)) + +// protected via nvfs_module_mutex +int REGISTER_FUNC(struct nvfs_dma_rw_ops *ops) +{ + if (NVIDIA_FS_COMPAT_FT(ops)) { + nvfs_ops = ops; + atomic_set(&nvfs_shutdown, 0); + return 0; + } else + return -EOPNOTSUPP; + +} +EXPORT_SYMBOL_GPL(REGISTER_FUNC); + +// protected via nvfs_module_mutex +void UNREGISTER_FUNC(void) +{ + (void) atomic_cmpxchg(&nvfs_shutdown, 0, 1); + do { + msleep(NVFS_HOLD_TIME_MS); + } while(nvfs_count_ops()); + nvfs_ops = NULL; +} +EXPORT_SYMBOL_GPL(UNREGISTER_FUNC); +#endif diff --git a/drivers/nvme/host/nvfs-rdma.h b/drivers/nvme/host/nvfs-rdma.h new file mode 100644 index 0000000000000..f9721ac9ead1e --- /dev/null +++ b/drivers/nvme/host/nvfs-rdma.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef NVFS_RDMA_H +#define NVFS_RDMA_H + +static bool nvme_rdma_nvfs_unmap_data(struct ib_device *ibdev, + struct request *rq) + +{ + struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); + enum dma_data_direction dma_dir = rq_dma_dir(rq); + int count; + + if (!blk_integrity_rq(rq) && nvfs_ops != NULL) { + count = nvfs_ops->nvfs_dma_unmap_sg(ibdev->dma_device, req->data_sgl.sg_table.sgl, req->data_sgl.nents, + dma_dir); + if (count) { + nvfs_put_ops(); + sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT); + return true; + } + } + return false; +} + +static int nvme_rdma_nvfs_map_data(struct ib_device *ibdev, struct request *rq, bool *is_nvfs_io, int* count) +{ + struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); + enum dma_data_direction dma_dir = rq_dma_dir(rq); + int ret = 0; + + *is_nvfs_io = false; + *count = 0; + if (!blk_integrity_rq(rq) && nvfs_get_ops()) { + + // associates bio pages to scatterlist + *count = nvfs_ops->nvfs_blk_rq_map_sg(rq->q, rq , req->data_sgl.sg_table.sgl); + if (!*count) { + nvfs_put_ops(); + return 0; // fall to cpu path + } + + *is_nvfs_io = true; + if (unlikely((*count == NVFS_IO_ERR))) { + nvfs_put_ops(); + pr_err("%s: failed to map sg_nents=:%d\n", __func__, req->data_sgl.nents); + return -EIO; + } + req->data_sgl.nents = *count; + + *count = nvfs_ops->nvfs_dma_map_sg_attrs(ibdev->dma_device, + req->data_sgl.sg_table.sgl, + req->data_sgl.nents, + dma_dir, + DMA_ATTR_NO_WARN); + + if (unlikely((*count == NVFS_IO_ERR))) { + nvfs_put_ops(); + return -EIO; + } + + if (unlikely(*count == NVFS_CPU_REQ)) { + nvfs_put_ops(); + return -EIO; + } + + return ret; + } else { + // Fall to CPU path + return 0; + } + + return ret; +} + +#endif diff --git a/drivers/nvme/host/nvfs.h b/drivers/nvme/host/nvfs.h new file mode 100644 index 0000000000000..0eb51b94b8d2e --- /dev/null +++ b/drivers/nvme/host/nvfs.h @@ -0,0 +1,156 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ +#ifndef NVFS_H +#define NVFS_H + +#include +#include +#include +#include +#include +#include +#include + +/* Forward declarations */ +struct blk_dma_iter; +struct dma_iova_state; + +#define REGSTR2(x) x##_register_nvfs_dma_ops +#define REGSTR(x) REGSTR2(x) + +#define UNREGSTR2(x) x##_unregister_nvfs_dma_ops +#define UNREGSTR(x) UNREGSTR2(x) + +#define REGISTER_FUNC REGSTR(MODULE_PREFIX) +#define UNREGISTER_FUNC UNREGSTR(MODULE_PREFIX) + +#define NVFS_IO_ERR -1 +#define NVFS_CPU_REQ -2 + +#define NVFS_HOLD_TIME_MS 1000 + +#ifdef NVFS_USE_DMA_ITER_API +extern struct nvfs_dma_rw_blk_iter_ops *nvfs_ops; +#else +extern struct nvfs_dma_rw_ops *nvfs_ops; +#endif + +extern atomic_t nvfs_shutdown; + +DECLARE_PER_CPU(long, nvfs_n_ops); + +static inline long nvfs_count_ops(void) +{ + int i; + long sum = 0; + for_each_possible_cpu(i) + sum += per_cpu(nvfs_n_ops, i); + return sum; +} + +static inline bool nvfs_get_ops(void) +{ + if (nvfs_ops && !atomic_read(&nvfs_shutdown)) { + this_cpu_inc(nvfs_n_ops); + return true; + } + return false; +} + +static inline void nvfs_put_ops(void) +{ + this_cpu_dec(nvfs_n_ops); +} + + +struct nvfs_dma_rw_blk_iter_ops { + unsigned long long ft_bmap; // feature bitmap + + int (*nvfs_blk_rq_dma_map_iter_start) (struct request *req, + struct device *dma_dev, + struct dma_iova_state *state, + struct blk_dma_iter *iter, + void **cookie); + + int (*nvfs_blk_rq_dma_map_iter_next) (struct request *req, + struct device *dma_dev, + struct dma_iova_state *state, + struct blk_dma_iter *iter); + + int (*nvfs_dma_unmap_page) (struct device *device, + void* cookie, + dma_addr_t addr, + size_t size, + enum dma_data_direction dir); + + bool (*nvfs_is_gpu_page) (struct page *page); + + unsigned int (*nvfs_gpu_index) (struct page *page); + + unsigned int (*nvfs_device_priority) (struct device *dev, unsigned int gpu_index); + +}; + +struct nvfs_dma_rw_ops { + unsigned long long ft_bmap; // feature bitmap + + int (*nvfs_blk_rq_map_sg) (struct request_queue *q, + struct request *req, + struct scatterlist *sglist); + + int (*nvfs_dma_map_sg_attrs) (struct device *device, + struct scatterlist *sglist, + int nents, + enum dma_data_direction dma_dir, + unsigned long attrs); + + int (*nvfs_dma_unmap_sg) (struct device *device, + struct scatterlist *sglist, + int nents, + enum dma_data_direction dma_dir); + + bool (*nvfs_is_gpu_page) (struct page *page); + + unsigned int (*nvfs_gpu_index) (struct page *page); + + unsigned int (*nvfs_device_priority) (struct device *dev, unsigned int gpu_index); +}; + +// feature list for dma_ops, values indicate bit pos +enum ft_bits { + nvfs_ft_prep_sglist = 1ULL << 0, + nvfs_ft_map_sglist = 1ULL << 1, + nvfs_ft_is_gpu_page = 1ULL << 2, + nvfs_ft_device_priority = 1ULL << 3, + nvfs_ft_blk_dma_map_iter_start = 1ULL << 5, + nvfs_ft_blk_dma_map_iter_next = 1ULL << 6, +}; + +// check features for use in registration with vendor drivers +#define NVIDIA_FS_CHECK_FT_SGLIST_PREP(ops) ((ops)->ft_bmap & nvfs_ft_prep_sglist) +#define NVIDIA_FS_CHECK_FT_SGLIST_DMA(ops) ((ops)->ft_bmap & nvfs_ft_map_sglist) +#define NVIDIA_FS_CHECK_FT_GPU_PAGE(ops) ((ops)->ft_bmap & nvfs_ft_is_gpu_page) +#define NVIDIA_FS_CHECK_FT_DEVICE_PRIORITY(ops) ((ops)->ft_bmap & nvfs_ft_device_priority) +#define NVIDIA_FS_CHECK_FT_BLK_DMA_MAP_ITER_START(ops) ((ops)->ft_bmap & nvfs_ft_blk_dma_map_iter_start) +#define NVIDIA_FS_CHECK_FT_BLK_DMA_MAP_ITER_NEXT(ops) ((ops)->ft_bmap & nvfs_ft_blk_dma_map_iter_next) + +#ifdef NVFS_USE_DMA_ITER_API +int REGISTER_FUNC(struct nvfs_dma_rw_blk_iter_ops *ops); +#else +int REGISTER_FUNC(struct nvfs_dma_rw_ops *ops); +#endif + +void UNREGISTER_FUNC(void); + +#endif /* NVFS_H */ diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 8ed5f1941f05c..7e17c3f57d3eb 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -30,6 +30,10 @@ #include "trace.h" #include "nvme.h" +#ifdef CONFIG_NVFS +#define NVFS_USE_DMA_ITER_API +#include "nvfs.h" +#endif #define SQ_SIZE(q) ((q)->q_depth << (q)->sqes) #define CQ_SIZE(q) ((q)->q_depth * sizeof(struct nvme_completion)) @@ -261,6 +265,11 @@ enum nvme_iod_flags { /* single segment dma mapping */ IOD_SINGLE_SEGMENT = 1U << 2, + +#ifdef CONFIG_NVFS + /* NVFS GPU Direct Storage I/O */ + IOD_NVFS_IO = 1U << 3, +#endif }; struct nvme_dma_vec { @@ -286,6 +295,9 @@ struct nvme_iod { dma_addr_t meta_dma; struct sg_table meta_sgt; struct nvme_sgl_desc *meta_descriptor; +#ifdef CONFIG_NVFS + void *nvfs_cookie; +#endif }; static inline unsigned int nvme_dbbuf_size(struct nvme_dev *dev) @@ -711,12 +723,22 @@ static void nvme_free_sgls(struct request *req) } } +#ifdef CONFIG_NVFS +#include "nvfs-dma.h" +#endif + static void nvme_unmap_data(struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct nvme_queue *nvmeq = req->mq_hctx->driver_data; struct device *dma_dev = nvmeq->dev->dev; +#ifdef CONFIG_NVFS + /* Check if this was an NVFS I/O and handle unmapping */ + if (nvme_nvfs_unmap_data(req)) + return; +#endif + if (iod->flags & IOD_SINGLE_SEGMENT) { static_assert(offsetof(union nvme_data_ptr, prp1) == offsetof(union nvme_data_ptr, sgl.addr)); @@ -743,6 +765,21 @@ static bool nvme_pci_prp_iter_next(struct request *req, struct device *dma_dev, if (iter->len) return true; + +#ifdef CONFIG_NVFS + if (iod->flags & IOD_NVFS_IO) { + if (!nvfs_ops->nvfs_blk_rq_dma_map_iter_next(req, dma_dev, + &iod->dma_state, iter)) + return false; + + iod->dma_vecs[iod->nr_dma_vecs].addr = iter->addr; + iod->dma_vecs[iod->nr_dma_vecs].len = iter->len; + iod->nr_dma_vecs++; + + return true; + } +#endif + if (!blk_rq_dma_map_iter_next(req, dma_dev, &iod->dma_state, iter)) return false; if (!dma_use_iova(&iod->dma_state) && dma_need_unmap(dma_dev)) { @@ -763,7 +800,11 @@ static blk_status_t nvme_pci_setup_data_prp(struct request *req, unsigned int prp_len, i; __le64 *prp_list; - if (!dma_use_iova(&iod->dma_state) && dma_need_unmap(nvmeq->dev->dev)) { + if ( +#ifdef CONFIG_NVFS + (iod->flags & IOD_NVFS_IO) || +#endif + (!dma_use_iova(&iod->dma_state) && dma_need_unmap(nvmeq->dev->dev))) { iod->dma_vecs = mempool_alloc(nvmeq->dev->dmavec_mempool, GFP_ATOMIC); if (!iod->dma_vecs) @@ -868,6 +909,11 @@ static blk_status_t nvme_pci_setup_data_prp(struct request *req, */ iod->cmd.common.dptr.prp1 = cpu_to_le64(prp1_dma); iod->cmd.common.dptr.prp2 = cpu_to_le64(prp2_dma); +#ifdef CONFIG_NVFS + /* For NVFS, don't call nvme_unmap_data - cleanup happens in nvme_nvfs_unmap_data */ + if (iod->flags & IOD_NVFS_IO) + return iter->status; +#endif if (unlikely(iter->status)) nvme_unmap_data(req); return iter->status; @@ -908,10 +954,15 @@ static blk_status_t nvme_pci_setup_data_sgl(struct request *req, /* set the transfer type as SGL */ iod->cmd.common.flags = NVME_CMD_SGL_METABUF; - if (entries == 1 || blk_rq_dma_map_coalesce(&iod->dma_state)) { - nvme_pci_sgl_set_data(&iod->cmd.common.dptr.sgl, iter); - iod->total_len += iter->len; - return BLK_STS_OK; +#ifdef CONFIG_NVFS + if (!(iod->flags & IOD_NVFS_IO)) +#endif + { + if (entries == 1 || blk_rq_dma_map_coalesce(&iod->dma_state)) { + nvme_pci_sgl_set_data(&iod->cmd.common.dptr.sgl, iter); + iod->total_len += iter->len; + return BLK_STS_OK; + } } if (entries <= NVME_SMALL_POOL_SIZE / sizeof(*sg_list)) @@ -930,10 +981,21 @@ static blk_status_t nvme_pci_setup_data_sgl(struct request *req, } nvme_pci_sgl_set_data(&sg_list[mapped++], iter); iod->total_len += iter->len; - } while (blk_rq_dma_map_iter_next(req, nvmeq->dev->dev, &iod->dma_state, + } while ( +#ifdef CONFIG_NVFS + (iod->flags & IOD_NVFS_IO) ? + nvfs_ops->nvfs_blk_rq_dma_map_iter_next(req, nvmeq->dev->dev, + &iod->dma_state, iter) : +#endif + blk_rq_dma_map_iter_next(req, nvmeq->dev->dev, &iod->dma_state, iter)); nvme_pci_sgl_set_seg(&iod->cmd.common.dptr.sgl, sgl_dma, mapped); +#ifdef CONFIG_NVFS + /* For NVFS, don't call nvme_unmap_data - cleanup happens in nvme_nvfs_unmap_data */ + if (iod->flags & IOD_NVFS_IO) + return iter->status; +#endif if (unlikely(iter->status)) nvme_unmap_data(req); return iter->status; @@ -987,6 +1049,12 @@ static blk_status_t nvme_map_data(struct request *req) struct blk_dma_iter iter; blk_status_t ret; +#ifdef CONFIG_NVFS + bool is_nvfs_io = false; + ret = nvme_nvfs_map_data(req, &is_nvfs_io); + if (is_nvfs_io) + return ret; +#endif /* * Try to skip the DMA iterator for single segment requests, as that * significantly improves performances for small I/O sizes. diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 190a4cfa8a5ee..c2bebd7cebec4 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -27,6 +27,9 @@ #include "nvme.h" #include "fabrics.h" +#ifdef CONFIG_NVFS +#include "nvfs.h" +#endif #define NVME_RDMA_CM_TIMEOUT_MS 3000 /* 3 second */ @@ -1212,6 +1215,9 @@ static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue, return ib_post_send(queue->qp, &wr, NULL); } +#ifdef CONFIG_NVFS +#include "nvfs-rdma.h" +#endif static void nvme_rdma_dma_unmap_req(struct ib_device *ibdev, struct request *rq) { struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); @@ -1223,6 +1229,11 @@ static void nvme_rdma_dma_unmap_req(struct ib_device *ibdev, struct request *rq) NVME_INLINE_METADATA_SG_CNT); } +#ifdef CONFIG_NVFS + if (nvme_rdma_nvfs_unmap_data(ibdev, rq)) + return; +#endif + ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents, rq_dma_dir(rq)); sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT); @@ -1476,6 +1487,17 @@ static int nvme_rdma_dma_map_req(struct ib_device *ibdev, struct request *rq, if (ret) return -ENOMEM; +#ifdef CONFIG_NVFS + { + bool is_nvfs_io = false; + ret = nvme_rdma_nvfs_map_data(ibdev, rq, &is_nvfs_io, count); + if (is_nvfs_io) { + if (ret) + goto out_free_table; + return 0; + } + } +#endif req->data_sgl.nents = blk_rq_map_sg(rq, req->data_sgl.sg_table.sgl); *count = ib_dma_map_sg(ibdev, req->data_sgl.sg_table.sgl, diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 372de7961d2a6..9a6943688e6db 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -30,6 +30,7 @@ #include #include #include +#include "../cxl/cxlpci.h" #include "pci.h" DEFINE_MUTEX(pci_slot_mutex); @@ -5133,6 +5134,151 @@ static int cxl_reset_bus_function(struct pci_dev *dev, bool probe) return rc; } +static int cxl_reset_prepare(struct pci_dev *dev, u16 dvsec) +{ + u32 timeout_us = 100, timeout_tot_us = 10000; + u16 reg, cap; + int rc; + + if (!pci_wait_for_pending_transaction(dev)) + pci_err(dev, "timed out waiting for pending transaction; performing cxl reset anyway\n"); + + /* Check if the device is cache capable. */ + rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CAP_OFFSET, &cap); + if (rc) + return rc; + + if (!(cap & CXL_DVSEC_CACHE_CAPABLE)) + return 0; + + /* Disable cache. WB and invalidate cache if capability is advertised */ + rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, ®); + if (rc) + return rc; + reg |= CXL_DVSEC_DISABLE_CACHING; + /* + * DEVCTL2 bits are written only once. So check WB+I capability while + * keeping disable caching set. + */ + if (cap & CXL_DVSEC_CACHE_WBI_CAPABLE) + reg |= CXL_DVSEC_INIT_CACHE_WBI; + pci_write_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, reg); + + /* + * From Section 9.6: "Software may leverage the cache size reported in + * the DVSEC CXL Capability2 register to compute a suitable timeout + * value". + * Given there is no conversion factor for cache size -> timeout, + * setting timer for default 10ms. + */ + do { + if (timeout_tot_us == 0) + return -ETIMEDOUT; + usleep_range(timeout_us, timeout_us + 1); + timeout_tot_us -= timeout_us; + rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, + ®); + if (rc) + return rc; + } while (!(reg & CXL_DVSEC_CACHE_INVALID)); + + return 0; +} + +static int cxl_reset_init(struct pci_dev *dev, u16 dvsec) +{ + /* + * Timeout values ref CXL Spec v3.2 Ch 8 Control and Status Registers, + * under section 8.1.3.1 DVSEC CXL Capability. + */ + u32 reset_timeouts_ms[] = { 10, 100, 1000, 10000, 100000 }; + u16 reg; + u32 timeout_ms; + int rc, ind; + + /* Check if CXL Reset MEM CLR is supported. */ + rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CAP_OFFSET, ®); + if (rc) + return rc; + + if (reg & CXL_DVSEC_CXL_RST_MEM_CLR_CAPABLE) { + rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, + ®); + if (rc) + return rc; + + reg |= CXL_DVSEC_CXL_RST_MEM_CLR_ENABLE; + pci_write_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, reg); + } + + /* Read timeout value. */ + rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CAP_OFFSET, ®); + if (rc) + return rc; + ind = FIELD_GET(CXL_DVSEC_CXL_RST_TIMEOUT_MASK, reg); + timeout_ms = reset_timeouts_ms[ind]; + + /* Write reset config. */ + rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, ®); + if (rc) + return rc; + + reg |= CXL_DVSEC_INIT_CXL_RESET; + pci_write_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, reg); + + /* Wait till timeout and then check reset status is complete. */ + msleep(timeout_ms); + rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_STATUS2_OFFSET, ®); + if (rc) + return rc; + if (reg & CXL_DVSEC_CXL_RESET_ERR || + ~reg & CXL_DVSEC_CXL_RST_COMPLETE) + return -ETIMEDOUT; + + rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, ®); + if (rc) + return rc; + reg &= (~CXL_DVSEC_DISABLE_CACHING); + pci_write_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, reg); + + return 0; +} + +/** + * cxl_reset - initiate a cxl reset + * @dev: device to reset + * @probe: if true, return 0 if device can be reset this way + * + * Initiate a cxl reset on @dev. + */ +static int cxl_reset(struct pci_dev *dev, bool probe) +{ + u16 dvsec, reg; + int rc; + + dvsec = pci_find_dvsec_capability(dev, PCI_VENDOR_ID_CXL, + CXL_DVSEC_PCIE_DEVICE); + if (!dvsec) + return -ENOTTY; + + /* Check if CXL Reset is supported. */ + rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CAP_OFFSET, ®); + if (rc) + return -ENOTTY; + + if ((reg & CXL_DVSEC_CXL_RST_CAPABLE) == 0) + return -ENOTTY; + + if (probe) + return 0; + + rc = cxl_reset_prepare(dev, dvsec); + if (rc) + return rc; + + return cxl_reset_init(dev, dvsec); +} + void pci_dev_lock(struct pci_dev *dev) { /* block PM suspend, driver probe, etc. */ @@ -5219,6 +5365,7 @@ const struct pci_reset_fn_method pci_reset_fn_methods[] = { { pci_dev_acpi_reset, .name = "acpi" }, { pcie_reset_flr, .name = "flr" }, { pci_af_flr, .name = "af_flr" }, + { cxl_reset, .name = "cxl_reset" }, { pci_pm_reset, .name = "pm" }, { pci_reset_bus_function, .name = "bus" }, { cxl_reset_bus_function, .name = "cxl_bus" }, diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 23245352a3fc0..fa2a5867659f2 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -2547,6 +2547,7 @@ static int arm_cmn_probe(struct platform_device *pdev) struct arm_cmn *cmn; const char *name; static atomic_t id; + struct resource *cfg; int err, rootnode, this_id; cmn = devm_kzalloc(&pdev->dev, sizeof(*cmn), GFP_KERNEL); @@ -2562,7 +2563,16 @@ static int arm_cmn_probe(struct platform_device *pdev) rootnode = arm_cmn600_acpi_probe(pdev, cmn); } else { rootnode = 0; - cmn->base = devm_platform_ioremap_resource(pdev, 0); + + /* + * Avoid requesting resources as the PMUs registers are + * scattered through CMN, and may appear either side of + * registers for other 'devices'. (e.g. the MPAM MSC controls). + */ + cfg = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!cfg) + return -EINVAL; + cmn->base = devm_ioremap(&pdev->dev, cfg->start, resource_size(cfg)); if (IS_ERR(cmn->base)) return PTR_ERR(cmn->base); if (cmn->part == PART_CMN600) diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c index efa9b229e7012..33ad2cab5c160 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.c +++ b/drivers/perf/arm_cspmu/arm_cspmu.c @@ -322,14 +322,14 @@ static struct arm_cspmu_impl_match impl_match[] = { { .module_name = "nvidia_cspmu", .pmiidr_val = ARM_CSPMU_IMPL_ID_NVIDIA, - .pmiidr_mask = ARM_CSPMU_PMIIDR_IMPLEMENTER, + .pmiidr_mask = PMIIDR_IMPLEMENTER, .module = NULL, .impl_init_ops = NULL, }, { .module_name = "ampere_cspmu", .pmiidr_val = ARM_CSPMU_IMPL_ID_AMPERE, - .pmiidr_mask = ARM_CSPMU_PMIIDR_IMPLEMENTER, + .pmiidr_mask = PMIIDR_IMPLEMENTER, .module = NULL, .impl_init_ops = NULL, }, @@ -351,6 +351,44 @@ static struct arm_cspmu_impl_match *arm_cspmu_impl_match_get(u32 pmiidr) return NULL; } +static u32 arm_cspmu_get_pmiidr(struct arm_cspmu *cspmu) +{ + u32 pmiidr, pmpidr; + + pmiidr = readl(cspmu->base0 + PMIIDR); + + if (pmiidr != 0) + return pmiidr; + + /* Construct PMIIDR value from PMPIDRs. */ + + pmpidr = readl(cspmu->base0 + PMPIDR0); + pmiidr |= FIELD_PREP(PMIIDR_PRODUCTID_PART_0, + FIELD_GET(PMPIDR0_PART_0, pmpidr)); + + pmpidr = readl(cspmu->base0 + PMPIDR1); + pmiidr |= FIELD_PREP(PMIIDR_PRODUCTID_PART_1, + FIELD_GET(PMPIDR1_PART_1, pmpidr)); + pmiidr |= FIELD_PREP(PMIIDR_IMPLEMENTER_DES_0, + FIELD_GET(PMPIDR1_DES_0, pmpidr)); + + pmpidr = readl(cspmu->base0 + PMPIDR2); + pmiidr |= FIELD_PREP(PMIIDR_VARIANT, + FIELD_GET(PMPIDR2_REVISION, pmpidr)); + pmiidr |= FIELD_PREP(PMIIDR_IMPLEMENTER_DES_1, + FIELD_GET(PMPIDR2_DES_1, pmpidr)); + + pmpidr = readl(cspmu->base0 + PMPIDR3); + pmiidr |= FIELD_PREP(PMIIDR_REVISION, + FIELD_GET(PMPIDR3_REVAND, pmpidr)); + + pmpidr = readl(cspmu->base0 + PMPIDR4); + pmiidr |= FIELD_PREP(PMIIDR_IMPLEMENTER_DES_2, + FIELD_GET(PMPIDR4_DES_2, pmpidr)); + + return pmiidr; +} + #define DEFAULT_IMPL_OP(name) .name = arm_cspmu_##name static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu) @@ -361,7 +399,7 @@ static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu) /* Start with a default PMU implementation */ cspmu->impl.module = THIS_MODULE; - cspmu->impl.pmiidr = readl(cspmu->base0 + PMIIDR); + cspmu->impl.pmiidr = arm_cspmu_get_pmiidr(cspmu); cspmu->impl.ops = (struct arm_cspmu_impl_ops) { DEFAULT_IMPL_OP(get_event_attrs), DEFAULT_IMPL_OP(get_format_attrs), @@ -815,6 +853,10 @@ static void arm_cspmu_stop(struct perf_event *event, int pmu_flags) return; arm_cspmu_disable_counter(cspmu, hwc->idx); + + if (cspmu->impl.ops.reset_ev_filter) + cspmu->impl.ops.reset_ev_filter(cspmu, event); + arm_cspmu_event_update(event); hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; diff --git a/drivers/perf/arm_cspmu/arm_cspmu.h b/drivers/perf/arm_cspmu/arm_cspmu.h index 19684b76bd969..cd65a58dbd884 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.h +++ b/drivers/perf/arm_cspmu/arm_cspmu.h @@ -86,6 +86,11 @@ #define PMCFGR 0xE00 #define PMCR 0xE04 #define PMIIDR 0xE08 +#define PMPIDR0 0xFE0 +#define PMPIDR1 0xFE4 +#define PMPIDR2 0xFE8 +#define PMPIDR3 0xFEC +#define PMPIDR4 0xFD0 /* PMCFGR register field */ #define PMCFGR_NCG GENMASK(31, 28) @@ -115,8 +120,34 @@ #define PMCR_E BIT(0) /* PMIIDR register field */ -#define ARM_CSPMU_PMIIDR_IMPLEMENTER GENMASK(11, 0) -#define ARM_CSPMU_PMIIDR_PRODUCTID GENMASK(31, 20) +#define PMIIDR_IMPLEMENTER GENMASK(11, 0) +#define PMIIDR_IMPLEMENTER_DES_0 GENMASK(3, 0) +#define PMIIDR_IMPLEMENTER_DES_1 GENMASK(6, 4) +#define PMIIDR_IMPLEMENTER_DES_2 GENMASK(11, 8) +#define PMIIDR_REVISION GENMASK(15, 12) +#define PMIIDR_VARIANT GENMASK(19, 16) +#define PMIIDR_PRODUCTID GENMASK(31, 20) +#define PMIIDR_PRODUCTID_PART_0 GENMASK(27, 20) +#define PMIIDR_PRODUCTID_PART_1 GENMASK(31, 28) + +/* PMPIDR0 register field */ +#define PMPIDR0_PART_0 GENMASK(7, 0) + +/* PMPIDR1 register field */ +#define PMPIDR1_DES_0 GENMASK(7, 4) +#define PMPIDR1_PART_1 GENMASK(3, 0) + +/* PMPIDR2 register field */ +#define PMPIDR2_REVISION GENMASK(7, 4) +#define PMPIDR2_DES_1 GENMASK(2, 0) + +/* PMPIDR3 register field */ +#define PMPIDR3_REVAND GENMASK(7, 4) +#define PMPIDR3_CMOD GENMASK(3, 0) + +/* PMPIDR4 register field */ +#define PMPIDR4_SIZE GENMASK(7, 4) +#define PMPIDR4_DES_2 GENMASK(3, 0) /* JEDEC-assigned JEP106 identification code */ #define ARM_CSPMU_IMPL_ID_NVIDIA 0x36B @@ -152,11 +183,13 @@ struct arm_cspmu_impl_ops { bool (*is_cycle_counter_event)(const struct perf_event *event); /* Decode event type/id from configs */ u32 (*event_type)(const struct perf_event *event); - /* Set event filters */ + /* Set/reset event filters */ void (*set_cc_filter)(struct arm_cspmu *cspmu, const struct perf_event *event); void (*set_ev_filter)(struct arm_cspmu *cspmu, const struct perf_event *event); + void (*reset_ev_filter)(struct arm_cspmu *cspmu, + const struct perf_event *event); /* Implementation specific event validation */ int (*validate_event)(struct arm_cspmu *cspmu, struct perf_event *event); diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.c b/drivers/perf/arm_cspmu/nvidia_cspmu.c index dc6d4e3e2a1ba..e06a06d3407b1 100644 --- a/drivers/perf/arm_cspmu/nvidia_cspmu.c +++ b/drivers/perf/arm_cspmu/nvidia_cspmu.c @@ -23,7 +23,7 @@ #define NV_GENERIC_FILTER_ID_MASK GENMASK_ULL(31, 0) -#define NV_PRODID_MASK GENMASK(31, 0) +#define NV_PRODID_MASK (PMIIDR_PRODUCTID | PMIIDR_VARIANT | PMIIDR_REVISION) #define NV_FORMAT_NAME_GENERIC 0 @@ -40,10 +40,21 @@ struct nv_cspmu_ctx { const char *name; - u32 filter_mask; - u32 filter_default_val; + struct attribute **event_attr; struct attribute **format_attr; + + u32 filter_mask; + u32 filter_default_val; + u32 filter2_mask; + u32 filter2_default_val; + + u32 (*get_filter)(const struct perf_event *event); + u32 (*get_filter2)(const struct perf_event *event); + + void *data; + + int (*init_data)(struct arm_cspmu *cspmu); }; static struct attribute *scf_pmu_event_attrs[] = { @@ -144,6 +155,7 @@ static struct attribute *cnvlink_pmu_format_attrs[] = { static struct attribute *generic_pmu_format_attrs[] = { ARM_CSPMU_FORMAT_EVENT_ATTR, ARM_CSPMU_FORMAT_FILTER_ATTR, + ARM_CSPMU_FORMAT_FILTER2_ATTR, NULL, }; @@ -184,13 +196,36 @@ static u32 nv_cspmu_event_filter(const struct perf_event *event) return filter_val; } +static u32 nv_cspmu_event_filter2(const struct perf_event *event) +{ + const struct nv_cspmu_ctx *ctx = + to_nv_cspmu_ctx(to_arm_cspmu(event->pmu)); + + const u32 filter_val = event->attr.config2 & ctx->filter2_mask; + + if (filter_val == 0) + return ctx->filter2_default_val; + + return filter_val; +} + static void nv_cspmu_set_ev_filter(struct arm_cspmu *cspmu, const struct perf_event *event) { - u32 filter = nv_cspmu_event_filter(event); - u32 offset = PMEVFILTR + (4 * event->hw.idx); + u32 filter, offset; + const struct nv_cspmu_ctx *ctx = + to_nv_cspmu_ctx(to_arm_cspmu(event->pmu)); + offset = 4 * event->hw.idx; + + if (ctx->get_filter) { + filter = ctx->get_filter(event); + writel(filter, cspmu->base0 + PMEVFILTR + offset); + } - writel(filter, cspmu->base0 + offset); + if (ctx->get_filter2) { + filter = ctx->get_filter2(event); + writel(filter, cspmu->base0 + PMEVFILT2R + offset); + } } static void nv_cspmu_set_cc_filter(struct arm_cspmu *cspmu, @@ -210,74 +245,120 @@ enum nv_cspmu_name_fmt { struct nv_cspmu_match { u32 prodid; u32 prodid_mask; - u64 filter_mask; - u32 filter_default_val; const char *name_pattern; enum nv_cspmu_name_fmt name_fmt; - struct attribute **event_attr; - struct attribute **format_attr; + struct nv_cspmu_ctx template_ctx; + struct arm_cspmu_impl_ops ops; }; static const struct nv_cspmu_match nv_cspmu_match[] = { { - .prodid = 0x103, + .prodid = 0x10300000, .prodid_mask = NV_PRODID_MASK, - .filter_mask = NV_PCIE_FILTER_ID_MASK, - .filter_default_val = NV_PCIE_FILTER_ID_MASK, .name_pattern = "nvidia_pcie_pmu_%u", .name_fmt = NAME_FMT_SOCKET, - .event_attr = mcf_pmu_event_attrs, - .format_attr = pcie_pmu_format_attrs + .template_ctx = { + .event_attr = mcf_pmu_event_attrs, + .format_attr = pcie_pmu_format_attrs, + .filter_mask = NV_PCIE_FILTER_ID_MASK, + .filter_default_val = NV_PCIE_FILTER_ID_MASK, + .filter2_mask = 0x0, + .filter2_default_val = 0x0, + .get_filter = nv_cspmu_event_filter, + .get_filter2 = NULL, + .data = NULL, + .init_data = NULL + }, }, { - .prodid = 0x104, + .prodid = 0x10400000, .prodid_mask = NV_PRODID_MASK, - .filter_mask = NV_NVL_C2C_FILTER_ID_MASK, - .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK, .name_pattern = "nvidia_nvlink_c2c1_pmu_%u", .name_fmt = NAME_FMT_SOCKET, - .event_attr = mcf_pmu_event_attrs, - .format_attr = nvlink_c2c_pmu_format_attrs + .template_ctx = { + .event_attr = mcf_pmu_event_attrs, + .format_attr = nvlink_c2c_pmu_format_attrs, + .filter_mask = NV_NVL_C2C_FILTER_ID_MASK, + .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK, + .filter2_mask = 0x0, + .filter2_default_val = 0x0, + .get_filter = nv_cspmu_event_filter, + .get_filter2 = NULL, + .data = NULL, + .init_data = NULL + }, }, { - .prodid = 0x105, + .prodid = 0x10500000, .prodid_mask = NV_PRODID_MASK, - .filter_mask = NV_NVL_C2C_FILTER_ID_MASK, - .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK, .name_pattern = "nvidia_nvlink_c2c0_pmu_%u", .name_fmt = NAME_FMT_SOCKET, - .event_attr = mcf_pmu_event_attrs, - .format_attr = nvlink_c2c_pmu_format_attrs + .template_ctx = { + .event_attr = mcf_pmu_event_attrs, + .format_attr = nvlink_c2c_pmu_format_attrs, + .filter_mask = NV_NVL_C2C_FILTER_ID_MASK, + .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK, + .filter2_mask = 0x0, + .filter2_default_val = 0x0, + .get_filter = nv_cspmu_event_filter, + .get_filter2 = NULL, + .data = NULL, + .init_data = NULL + }, }, { - .prodid = 0x106, + .prodid = 0x10600000, .prodid_mask = NV_PRODID_MASK, - .filter_mask = NV_CNVL_FILTER_ID_MASK, - .filter_default_val = NV_CNVL_FILTER_ID_MASK, .name_pattern = "nvidia_cnvlink_pmu_%u", .name_fmt = NAME_FMT_SOCKET, - .event_attr = mcf_pmu_event_attrs, - .format_attr = cnvlink_pmu_format_attrs + .template_ctx = { + .event_attr = mcf_pmu_event_attrs, + .format_attr = cnvlink_pmu_format_attrs, + .filter_mask = NV_CNVL_FILTER_ID_MASK, + .filter_default_val = NV_CNVL_FILTER_ID_MASK, + .filter2_mask = 0x0, + .filter2_default_val = 0x0, + .get_filter = nv_cspmu_event_filter, + .get_filter2 = NULL, + .data = NULL, + .init_data = NULL + }, }, { - .prodid = 0x2CF, + .prodid = 0x2CF00000, .prodid_mask = NV_PRODID_MASK, - .filter_mask = 0x0, - .filter_default_val = 0x0, .name_pattern = "nvidia_scf_pmu_%u", .name_fmt = NAME_FMT_SOCKET, - .event_attr = scf_pmu_event_attrs, - .format_attr = scf_pmu_format_attrs + .template_ctx = { + .event_attr = scf_pmu_event_attrs, + .format_attr = scf_pmu_format_attrs, + .filter_mask = 0x0, + .filter_default_val = 0x0, + .filter2_mask = 0x0, + .filter2_default_val = 0x0, + .get_filter = nv_cspmu_event_filter, + .get_filter2 = NULL, + .data = NULL, + .init_data = NULL + }, }, { .prodid = 0, .prodid_mask = 0, - .filter_mask = NV_GENERIC_FILTER_ID_MASK, - .filter_default_val = NV_GENERIC_FILTER_ID_MASK, .name_pattern = "nvidia_uncore_pmu_%u", .name_fmt = NAME_FMT_GENERIC, - .event_attr = generic_pmu_event_attrs, - .format_attr = generic_pmu_format_attrs + .template_ctx = { + .event_attr = generic_pmu_event_attrs, + .format_attr = generic_pmu_format_attrs, + .filter_mask = NV_GENERIC_FILTER_ID_MASK, + .filter_default_val = NV_GENERIC_FILTER_ID_MASK, + .filter2_mask = NV_GENERIC_FILTER_ID_MASK, + .filter2_default_val = NV_GENERIC_FILTER_ID_MASK, + .get_filter = nv_cspmu_event_filter, + .get_filter2 = nv_cspmu_event_filter2, + .data = NULL, + .init_data = NULL + }, }, }; @@ -310,9 +391,16 @@ static char *nv_cspmu_format_name(const struct arm_cspmu *cspmu, return name; } +#define SET_OP(name, impl, match, default_op) \ + do { \ + if (match->ops.name) \ + impl->name = match->ops.name; \ + else if (default_op != NULL) \ + impl->name = default_op; \ + } while (false) + static int nv_cspmu_init_ops(struct arm_cspmu *cspmu) { - u32 prodid; struct nv_cspmu_ctx *ctx; struct device *dev = cspmu->dev; struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops; @@ -322,30 +410,30 @@ static int nv_cspmu_init_ops(struct arm_cspmu *cspmu) if (!ctx) return -ENOMEM; - prodid = FIELD_GET(ARM_CSPMU_PMIIDR_PRODUCTID, cspmu->impl.pmiidr); - /* Find matching PMU. */ for (; match->prodid; match++) { const u32 prodid_mask = match->prodid_mask; - if ((match->prodid & prodid_mask) == (prodid & prodid_mask)) + if ((match->prodid & prodid_mask) == + (cspmu->impl.pmiidr & prodid_mask)) break; } - ctx->name = nv_cspmu_format_name(cspmu, match); - ctx->filter_mask = match->filter_mask; - ctx->filter_default_val = match->filter_default_val; - ctx->event_attr = match->event_attr; - ctx->format_attr = match->format_attr; + /* Initialize the context with the matched template. */ + memcpy(ctx, &match->template_ctx, sizeof(struct nv_cspmu_ctx)); + ctx->name = nv_cspmu_format_name(cspmu, match); cspmu->impl.ctx = ctx; /* NVIDIA specific callbacks. */ - impl_ops->set_cc_filter = nv_cspmu_set_cc_filter; - impl_ops->set_ev_filter = nv_cspmu_set_ev_filter; - impl_ops->get_event_attrs = nv_cspmu_get_event_attrs; - impl_ops->get_format_attrs = nv_cspmu_get_format_attrs; - impl_ops->get_name = nv_cspmu_get_name; + SET_OP(set_cc_filter, impl_ops, match, nv_cspmu_set_cc_filter); + SET_OP(set_ev_filter, impl_ops, match, nv_cspmu_set_ev_filter); + SET_OP(get_event_attrs, impl_ops, match, nv_cspmu_get_event_attrs); + SET_OP(get_format_attrs, impl_ops, match, nv_cspmu_get_format_attrs); + SET_OP(get_name, impl_ops, match, nv_cspmu_get_name); + + if (ctx->init_data) + return ctx->init_data(cspmu); return 0; } diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 5c310e803dd78..ae437791b5f8c 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -925,6 +925,12 @@ int armpmu_register(struct arm_pmu *pmu) if (ret) return ret; + /* + * By this stage we know our supported CPUs on either DT/ACPI platforms, + * detect the SMT implementation. + */ + pmu->has_smt = topology_core_has_smt(cpumask_first(&pmu->supported_cpus)); + if (!pmu->set_event_filter) pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE; diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index f6d7bab5d555c..d1d6000517b2b 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -978,6 +978,42 @@ static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc, return -EAGAIN; } +static bool armv8pmu_can_use_pmccntr(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT; + + if (evtype != ARMV8_PMUV3_PERFCTR_CPU_CYCLES) + return false; + + /* + * A CPU_CYCLES event with threshold counting cannot use PMCCNTR_EL0 + * since it lacks threshold support. + */ + if (armv8pmu_event_get_threshold(&event->attr)) + return false; + + /* + * PMCCNTR_EL0 is not affected by BRBE controls like BRBCR_ELx.FZP. + * So don't use it for branch events. + */ + if (has_branch_stack(event)) + return false; + + /* + * The PMCCNTR_EL0 increments from the processor clock rather than + * the PE clock (ARM DDI0487 L.b D13.1.3) which means it'll continue + * counting on a WFI PE if one of its SMT sibling is not idle on a + * multi-threaded implementation. So don't use it on SMT cores. + */ + if (cpu_pmu->has_smt) + return false; + + return true; +} + static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, struct perf_event *event) { @@ -986,8 +1022,7 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT; /* Always prefer to place a cycle counter into the cycle counter. */ - if ((evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) && - !armv8pmu_event_get_threshold(&event->attr) && !has_branch_stack(event)) { + if (armv8pmu_can_use_pmccntr(cpuc, event)) { if (!test_and_set_bit(ARMV8_PMU_CYCLE_IDX, cpuc->used_mask)) return ARMV8_PMU_CYCLE_IDX; else if (armv8pmu_event_is_64bit(event) && diff --git a/drivers/platform/arm64/Kconfig b/drivers/platform/arm64/Kconfig index e76bd7e07e217..2782d5933e178 100644 --- a/drivers/platform/arm64/Kconfig +++ b/drivers/platform/arm64/Kconfig @@ -95,4 +95,6 @@ config NVIDIA_FFA_EC Say M or Y here to include this support. +source "drivers/platform/arm64/nvidia/Kconfig" + endif # ARM64_PLATFORM_DEVICES diff --git a/drivers/platform/arm64/Makefile b/drivers/platform/arm64/Makefile index 4edb84d5ae213..47733ec5f26ad 100644 --- a/drivers/platform/arm64/Makefile +++ b/drivers/platform/arm64/Makefile @@ -10,3 +10,4 @@ obj-$(CONFIG_EC_HUAWEI_GAOKUN) += huawei-gaokun-ec.o obj-$(CONFIG_EC_LENOVO_YOGA_C630) += lenovo-yoga-c630.o obj-$(CONFIG_EC_LENOVO_YOGA_SLIM7X) += lenovo-yoga-slim7x.o obj-$(CONFIG_NVIDIA_FFA_EC) += nvidia-ffa-ec.o +obj-y += nvidia/ diff --git a/drivers/platform/arm64/nvidia/Kconfig b/drivers/platform/arm64/nvidia/Kconfig new file mode 100644 index 0000000000000..b12b290f30d4f --- /dev/null +++ b/drivers/platform/arm64/nvidia/Kconfig @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# NVIDIA ARM64 Platform-Specific Device Drivers +# + +config MTK_PCIE_HOTPLUG + tristate "CX7 PCIe Hotplug Driver" + depends on EINT_MTK + depends on PCI && ACPI + help + Say Y here to support PCIe device plug in/out detection. + It will disable PCIe link when plug out and enable + PCIe link after plug in. + + This is particularly useful for GB10 SoC. + + If unsure, say N. diff --git a/drivers/platform/arm64/nvidia/Makefile b/drivers/platform/arm64/nvidia/Makefile new file mode 100644 index 0000000000000..37cfbebb8d1af --- /dev/null +++ b/drivers/platform/arm64/nvidia/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Makefile for NVIDIA ARM64 platform-specific drivers +# +# CX7 PCIe Hotplug Driver +# Provides hotplug support for CX7 PCIe devices on GB10 SoC-based systems +# + +obj-$(CONFIG_MTK_PCIE_HOTPLUG) += mtk-pcie-hotplug.o diff --git a/drivers/platform/arm64/nvidia/mtk-pcie-hotplug.c b/drivers/platform/arm64/nvidia/mtk-pcie-hotplug.c new file mode 100644 index 0000000000000..06a84a29aa6fd --- /dev/null +++ b/drivers/platform/arm64/nvidia/mtk-pcie-hotplug.c @@ -0,0 +1,2324 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2014-2025 MediaTek Inc. + * Copyright (c) 2025-2026 NVIDIA Corporation + * + * CX7 PCIe Hotplug Driver + * + * Manages PCIe device hotplug using GPIO interrupts and ACPI resources. + * Supports cable insertion/removal detection and device power management. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define HP_PORT_MAX 3 +#define HP_POLL_CNT_MAX 200 +#define MAX_VENDOR_DATA_LEN 16 +#define CX7_HP_MMIO_REGION_COUNT 5 /* TOP, PROTECT, CKM, MAC Port 0, MAC Port 1 */ +#define CX7_HP_MIN_GPIO_COUNT 4 /* Minimum required: BOOT, PRSNT, PERST, EN */ +#define PINCTRL_MAPPING_ENTRY_SIZE 5 /* dev_name, state, ctrl_dev, group, function */ +/* Indices for pinctrl mapping entry strings */ +#define PINCTRL_IDX_DEV_NAME 0 +#define PINCTRL_IDX_STATE 1 +#define PINCTRL_IDX_CTRL_DEV 2 +#define PINCTRL_IDX_GROUP 3 +#define PINCTRL_IDX_FUNCTION 4 + +/* Hardware timing requirements (in microseconds unless noted) */ +#define CX7_HP_DELAY_SHORT_US 10 /* Short delay for register writes */ +#define CX7_HP_DELAY_STANDARD_US 10000 /* Standard delay (10ms) */ +#define CX7_HP_DELAY_BUS_PROTECT_US 5000 /* Bus protection setup delay */ +#define CX7_HP_DELAY_PHY_RESET_US 3000 /* PHY reset delay */ +#define CX7_HP_DELAY_LINK_STABLE_MS 100 /* Link stabilization delay (ms) */ +#define CX7_HP_POLL_SLEEP_US 10000 /* Polling loop sleep interval */ + +#define PLUG_IN_EVT "HOTPLUG_STATE=plugin" +#define REMOVAL_EVT "HOTPLUG_STATE=removal" + +/* Bus protection stages to prevent PCIe core reset glitches */ +#define BUS_PROTECT_INIT 0 +#define BUS_PROTECT_CABLE_REMOVAL 1 +#define BUS_PROTECT_CABLE_PLUGIN 2 +#define BUS_PROTECT_CLEANUP 3 + +enum cx7_hp_state { + STATE_READY = 0, + STATE_PLUG_OUT, /* Cable plug-out */ + STATE_DEV_POWER_OFF, /* Device is powered off */ + STATE_PLUG_IN, /* Cable plug-in detected */ + STATE_DEV_POWER_ON, /* Device is powered on */ + STATE_DEV_FW_START, /* Device firmware is running */ + STATE_RESCAN, /* Device ready, can perform bus rescan */ + STATE_UNKNOWN +}; + +enum pcie_pin_index { + PCIE_PIN_BOOT = 0, /* Device boot status pin */ + PCIE_PIN_PRSNT, /* Presence detection pin */ + PCIE_PIN_PERST, /* PCIe reset pin */ + PCIE_PIN_EN, /* Power enable pin */ + PCIE_PIN_CLQ0, /* Clock request pin 0 */ + PCIE_PIN_CLQ1, /* Clock request pin 1 */ + PCIE_PIN_MAX +}; + +struct pcie_port_info { + int domain; + int bus; + int devfn; +}; + +struct rp_bus_mmio_top { + u32 ctrl; + u32 port_bits[HP_PORT_MAX]; + u32 update_bit; +}; + +struct rp_bus_mmio_protect { + u32 mode; + u32 enable; + u32 port_bits[HP_PORT_MAX]; +}; + +struct rp_bus_mmio_mac { + u32 init_ctrl; + u32 ltssm_bit; + u32 phy_rst_bit; +}; + +struct rp_bus_mmio_ckm { + u32 ctrl; + u32 disable_bit; +}; + +struct rp_bus_mmio_info { + struct rp_bus_mmio_top top; + struct rp_bus_mmio_protect protect; + struct rp_bus_mmio_mac mac; + struct rp_bus_mmio_ckm ckm; +}; + +struct gpio_acpi_context { + struct device *dev; + unsigned int debounce_timeout_us; + int pin; + int wake_capable; + int triggering; + int polarity; + unsigned long irq_flags; + int valid; + unsigned int connection_type; + char vendor_data[MAX_VENDOR_DATA_LEN + 1]; +}; + +struct cx7_hp_dev; + +/** + * struct cx7_hp_plat_data - Platform configuration data parsed from ACPI + * + * Platform-specific configuration parsed from ACPI devices: + * - RES0 device (PNP0C02): PCIe configuration and MMIO register offsets via _DSD + * - PEDE device (MTKP0001): Pinctrl mappings via _DSD + */ +struct cx7_hp_plat_data { + int port_nums; + struct pcie_port_info ports[HP_PORT_MAX]; + u32 vendor_id; + u32 device_id; + int num_devices; + struct rp_bus_mmio_info rp_bus_mmio; + u32 ltssm_reg; + u32 ltssm_l0_state; + int pin_nums; + struct pinctrl_map *parsed_pinmap; +}; + +struct cx7_hp_gpio_ctx { + struct gpio_desc *desc; + struct gpio_acpi_context *ctx; + struct cx7_hp_dev *hp_dev; +}; + +struct acpi_gpio_parse_context { + struct gpio_acpi_context *ctx; + struct cx7_hp_dev *hp_dev; +}; + +struct acpi_gpio_walk_context { + struct device *dev; + struct gpio_info { + unsigned int pin; + unsigned int connection_type; + unsigned int triggering; + unsigned int polarity; + unsigned int debounce_timeout; + unsigned int wake_capable; + char vendor_data[MAX_VENDOR_DATA_LEN + 1]; + char resource_source[16]; + unsigned int resource_source_index; + } gpios[PCIE_PIN_MAX]; + int count; +}; + +struct cx7_hp_acpi_mmio { + struct acpi_resource_fixed_memory32 + mmio_regions[CX7_HP_MMIO_REGION_COUNT]; + int count; + struct device *dev; +}; + +enum cx7_hp_debug_val { + CX7_HP_DEBUG_PLUG_OUT = 0, + CX7_HP_DEBUG_PLUG_IN, + CX7_HP_DEBUG_MAX_VAL +}; + +struct cx7_hp_mmio_runtime { + void __iomem *top_base; + void __iomem *protect_base; + void __iomem *ckm_base; + void __iomem *mac_port_base[HP_PORT_MAX]; +}; + +/** + * cx7_hp_dev - Hotplug device structure + * + * ACPI resource sources: + * - MMIO addresses: RES0 device (PNP0C02) _CRS, stored in mmio field + * - GPIO resources: PEDE device (MTKP0001) _CRS, stored in pins field + */ +struct cx7_hp_dev { + struct cx7_hp_gpio_ctx *pins; + struct cx7_hp_plat_data *pd; + struct platform_device *pdev; + enum cx7_hp_state state; + int gpio_count; + int boot_pin; + int prsnt_pin; + enum cx7_hp_debug_val debug_state; + bool hotplug_enabled; + spinlock_t lock; + struct pci_dev *cached_root_ports[HP_PORT_MAX]; + struct cx7_hp_mmio_runtime mmio; + struct gpio_device *gdev; + struct notifier_block pci_notifier; +}; + +/* ACPI _DSD device properties GUID: daffd814-6eba-4d8c-8a91-bc9bbf4aa301 */ +static const guid_t device_properties_guid = +GUID_INIT(0xdaffd814, 0x6eba, 0x4d8c, + 0x8a, 0x91, 0xbc, 0x9b, + 0xbf, 0x4a, 0xa3, 0x01); + +/** + * cx7_hp_parse_pinctrl_config_dsd - Parse pinctrl configuration from PEDE device _DSD + * @hp_dev: hotplug device + * + * Parses pin-nums and pinctrl-mappings from _DSD. + * + * Returns: 0 on success, negative error code on failure + */ +static int cx7_hp_parse_pinctrl_config_dsd(struct cx7_hp_dev *hp_dev) +{ + struct acpi_device *adev; + struct device *dev = &hp_dev->pdev->dev; + const union acpi_object *mappings_pkg, *mapping_entry; + struct pinctrl_map *pinmap; + u32 pin_nums = 0; + int k; + const char *strings[PINCTRL_MAPPING_ENTRY_SIZE]; + + adev = ACPI_COMPANION(dev); + if (!adev) { + dev_err(dev, "Failed to get ACPI companion device\n"); + return -ENODEV; + } + + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + acpi_status status; + const union acpi_object *dsd_pkg, *props_pkg = NULL; + int i, j; + + status = acpi_evaluate_object_typed(adev->handle, "_DSD", NULL, &buffer, + ACPI_TYPE_PACKAGE); + if (ACPI_FAILURE(status)) { + dev_err(dev, "Failed to evaluate _DSD: %s\n", + acpi_format_exception(status)); + return -ENODEV; + } + + dsd_pkg = buffer.pointer; + if (!dsd_pkg || dsd_pkg->type != ACPI_TYPE_PACKAGE) { + dev_err(dev, "Invalid _DSD package\n"); + ACPI_FREE(buffer.pointer); + return -EINVAL; + } + /* Find Device Properties GUID package */ + for (i = 0; i + 1 < dsd_pkg->package.count; i += 2) { + const union acpi_object *guid = &dsd_pkg->package.elements[i]; + const union acpi_object *pkg = + &dsd_pkg->package.elements[i + 1]; + + /* Verify GUID matches Device Properties GUID */ + if (guid->type == ACPI_TYPE_BUFFER && guid->buffer.length == 16 && + pkg->type == ACPI_TYPE_PACKAGE && + guid_equal((guid_t *)guid->buffer.pointer, + &device_properties_guid)) { + props_pkg = pkg; + break; + } + } + + if (!props_pkg) { + dev_err(dev, + "Device Properties GUID package not found in _DSD\n"); + ACPI_FREE(buffer.pointer); + return -EINVAL; + } + + for (j = 0; j < props_pkg->package.count; j++) { + const union acpi_object *prop = &props_pkg->package.elements[j]; + + if (prop->type != ACPI_TYPE_PACKAGE || + prop->package.count != 2 || + prop->package.elements[0].type != ACPI_TYPE_STRING) + continue; + + const char *prop_name = + prop->package.elements[0].string.pointer; + const union acpi_object *prop_value = + &prop->package.elements[1]; + + if (!strcmp(prop_name, "pin-nums")) { + if (prop_value->type == ACPI_TYPE_INTEGER) { + pin_nums = prop_value->integer.value; + } + } else if (!strcmp(prop_name, "pinctrl-mappings")) { + if (prop_value->type == ACPI_TYPE_PACKAGE) + mappings_pkg = prop_value; + } + } + + if (pin_nums == 0) { + hp_dev->pd->pin_nums = 0; + ACPI_FREE(buffer.pointer); + return 0; + } + + if (!mappings_pkg) { + dev_err(dev, + "Missing required _DSD property: pinctrl-mappings\n"); + ACPI_FREE(buffer.pointer); + return -EINVAL; + } + + if (mappings_pkg->package.count != pin_nums) { + dev_err(dev, + "pinctrl-mappings count mismatch: expected %u, got %u\n", + pin_nums, mappings_pkg->package.count); + ACPI_FREE(buffer.pointer); + return -EINVAL; + } + + /* Allocate pinmap array */ + pinmap = devm_kcalloc(dev, pin_nums, sizeof(*pinmap), GFP_KERNEL); + if (!pinmap) { + ACPI_FREE(buffer.pointer); + return -ENOMEM; + } + + /* Parse each mapping entry */ + for (k = 0; k < pin_nums; k++) { + mapping_entry = &mappings_pkg->package.elements[k]; + if (mapping_entry->type != ACPI_TYPE_PACKAGE || + mapping_entry->package.count != ARRAY_SIZE(strings)) { + dev_err(dev, + "Invalid pinctrl mapping entry %d: expected Package(%zu), " + "got %s(count=%u)\n", + k, ARRAY_SIZE(strings), + mapping_entry->type == ACPI_TYPE_PACKAGE ? + "Package" : "non-Package", + mapping_entry->type == ACPI_TYPE_PACKAGE ? + mapping_entry->package.count : 0); + ACPI_FREE(buffer.pointer); + return -EINVAL; + } + + /* Extract strings: dev_name, state, ctrl_dev, group, function */ + for (int l = 0; l < ARRAY_SIZE(strings); l++) { + if (mapping_entry->package.elements[l].type != + ACPI_TYPE_STRING) { + dev_err(dev, + "Mapping entry %d element %d is not a string\n", + k, l); + ACPI_FREE(buffer.pointer); + return -EINVAL; + } + strings[l] = + mapping_entry->package.elements[l].string.pointer; + } + + /* Populate pinctrl_map structure */ + pinmap[k].dev_name = + devm_kstrdup(dev, strings[PINCTRL_IDX_DEV_NAME], + GFP_KERNEL); + pinmap[k].name = + devm_kstrdup(dev, strings[PINCTRL_IDX_STATE], GFP_KERNEL); + pinmap[k].type = PIN_MAP_TYPE_MUX_GROUP; + pinmap[k].ctrl_dev_name = + devm_kstrdup(dev, strings[PINCTRL_IDX_CTRL_DEV], + GFP_KERNEL); + pinmap[k].data.mux.group = + devm_kstrdup(dev, strings[PINCTRL_IDX_GROUP], GFP_KERNEL); + pinmap[k].data.mux.function = + devm_kstrdup(dev, strings[PINCTRL_IDX_FUNCTION], + GFP_KERNEL); + + if (!pinmap[k].dev_name || !pinmap[k].name || + !pinmap[k].ctrl_dev_name || !pinmap[k].data.mux.group || + !pinmap[k].data.mux.function) { + dev_err(dev, + "Failed to allocate memory for mapping %d\n", + k); + ACPI_FREE(buffer.pointer); + return -ENOMEM; + } + } + + hp_dev->pd->pin_nums = pin_nums; + hp_dev->pd->parsed_pinmap = pinmap; + ACPI_FREE(buffer.pointer); + dev_dbg(dev, "Successfully parsed %u pinctrl mappings from ACPI\n", + pin_nums); + return 0; +} + +/** + * cx7_hp_pinctrl_init - Register pinctrl mappings for the device + * @hp_dev: hotplug device + * + * Parses pinctrl mappings from _DSD and registers them. + * + * Returns: 0 on success, negative error code on failure + */ +static int cx7_hp_pinctrl_init(struct cx7_hp_dev *hp_dev) +{ + int ret; + + ret = cx7_hp_parse_pinctrl_config_dsd(hp_dev); + if (ret) { + dev_err(&hp_dev->pdev->dev, + "Failed to parse pinctrl configuration from ACPI: %d\n", + ret); + return ret; + } + + if (!hp_dev->pd->pin_nums) + return 0; + + ret = + pinctrl_register_mappings(hp_dev->pd->parsed_pinmap, + hp_dev->pd->pin_nums); + if (ret) { + dev_err(&hp_dev->pdev->dev, + "Failed to register pinctrl mappings\n"); + return ret; + } + + dev_dbg(&hp_dev->pdev->dev, "Registered %u pinctrl mappings\n", + hp_dev->pd->pin_nums); + return 0; +} + +/** + * cx7_hp_pinctrl_remove - Unregister pinctrl mappings + * @hp_dev: hotplug device + */ +static void cx7_hp_pinctrl_remove(struct cx7_hp_dev *hp_dev) +{ + if (!hp_dev->pd->pin_nums) + return; + + pinctrl_unregister_mappings(hp_dev->pd->parsed_pinmap); +} + +/** + * cx7_hp_change_pinctrl_state - Change pinctrl state + * @hp_dev: hotplug device + * @new_state: new pinctrl state name + * + * Returns: 0 on success, negative error code on failure + */ +static int cx7_hp_change_pinctrl_state(struct cx7_hp_dev *hp_dev, + const char *new_state) +{ + struct pinctrl *pinctrl; + struct pinctrl_state *state; + int ret; + + pinctrl = devm_pinctrl_get(&hp_dev->pdev->dev); + if (IS_ERR(pinctrl)) { + dev_err(&hp_dev->pdev->dev, "Failed to get pinctrl\n"); + return PTR_ERR(pinctrl); + } + + state = pinctrl_lookup_state(pinctrl, new_state); + if (IS_ERR(state)) { + dev_err(&hp_dev->pdev->dev, "Failed to lookup state:%s\n", + new_state); + return PTR_ERR(state); + } + + ret = pinctrl_select_state(pinctrl, state); + if (ret) { + dev_err(&hp_dev->pdev->dev, + "Failed to select pinctrl state:%s\n", new_state); + return ret; + } + + return 0; +} + +/** + * cx7_hp_send_uevent - Send uevent to userspace + * @hp_dev: hotplug device + * @msg: uevent message string + */ +static void cx7_hp_send_uevent(struct cx7_hp_dev *hp_dev, const char *msg) +{ + char *uevent = NULL; + char *envp[2]; + + uevent = kasprintf(GFP_KERNEL, msg); + if (!uevent) { + dev_err(&hp_dev->pdev->dev, + "Failed to allocate uevent string\n"); + return; + } + + envp[0] = uevent; + envp[1] = NULL; + + if (kobject_uevent_env(&hp_dev->pdev->dev.kobj, KOBJ_CHANGE, envp)) + dev_err(&hp_dev->pdev->dev, "Failed to send uevent\n"); + + kfree(uevent); +} + +/** + * cx7_hp_reg_update_bits - Update specific bits in a register + * @base: MMIO base address + * @offset: Register offset + * @mask: Bits to modify + * @set: true to set bits, false to clear bits + */ +static inline void cx7_hp_reg_update_bits(void __iomem *base, u32 offset, + u32 mask, bool set) +{ + u32 val = readl(base + offset); + + if (set) + val |= mask; + else + val &= ~mask; + + writel(val, base + offset); +} + +/** + * cx7_hp_toggle_update_bit - Toggle control register update bit + * @base: MMIO base address + * @ctrl_offset: Control register offset + * @bits: Bits to set/clear before toggling update + * @update_bit: Update bit mask + * @set: true to set bits, false to clear bits + * + * Performs the sequence: modify bits, clear update bit, set update bit + */ +static void cx7_hp_toggle_update_bit(void __iomem *base, u32 ctrl_offset, + u32 bits, u32 update_bit, bool set) +{ + cx7_hp_reg_update_bits(base, ctrl_offset, bits, set); + cx7_hp_reg_update_bits(base, ctrl_offset, update_bit, false); + cx7_hp_reg_update_bits(base, ctrl_offset, update_bit, true); +} + +/** + * cx7_hp_bus_protect_enable - Enable bus protection for a port + * @dev: hotplug device + * @port_idx: Port index + */ +static void cx7_hp_bus_protect_enable(struct cx7_hp_dev *dev, int port_idx) +{ + struct rp_bus_mmio_info *mmio_info = &dev->pd->rp_bus_mmio; + u32 port_bit = mmio_info->protect.port_bits[port_idx]; + + cx7_hp_reg_update_bits(dev->mmio.protect_base, + mmio_info->protect.mode, port_bit, true); + cx7_hp_reg_update_bits(dev->mmio.protect_base, + mmio_info->protect.enable, port_bit, true); +} + +/** + * cx7_hp_bus_protect_disable - Disable bus protection for a port + * @dev: hotplug device + * @port_idx: Port index + */ +static void cx7_hp_bus_protect_disable(struct cx7_hp_dev *dev, int port_idx) +{ + struct rp_bus_mmio_info *mmio_info = &dev->pd->rp_bus_mmio; + u32 port_bit = mmio_info->protect.port_bits[port_idx]; + + cx7_hp_reg_update_bits(dev->mmio.protect_base, + mmio_info->protect.enable, port_bit, false); + cx7_hp_reg_update_bits(dev->mmio.protect_base, + mmio_info->protect.mode, port_bit, false); +} + +/** + * cx7_hp_ckm_control - Control clock module + * @dev: hotplug device + * @disable: true to disable clock, false to enable + */ +static void cx7_hp_ckm_control(struct cx7_hp_dev *dev, bool disable) +{ + struct rp_bus_mmio_info *mmio_info = &dev->pd->rp_bus_mmio; + + if (!dev->mmio.ckm_base) + return; + + cx7_hp_reg_update_bits(dev->mmio.ckm_base, mmio_info->ckm.ctrl, + mmio_info->ckm.disable_bit, disable); +} + +/** + * cx7_hp_parse_mmio_resources - ACPI resource callback for parsing MMIO from _CRS + * @ares: ACPI resource being processed + * @data: pointer to cx7_hp_acpi_mmio structure + * + * Returns: AE_OK to continue iteration, AE_ERROR on error + */ +static acpi_status cx7_hp_parse_mmio_resources(struct acpi_resource *ares, + void *data) +{ + struct cx7_hp_acpi_mmio *parsed = data; + + switch (ares->type) { + case ACPI_RESOURCE_TYPE_FIXED_MEMORY32: + if (parsed->count >= CX7_HP_MMIO_REGION_COUNT) { + dev_warn(parsed->dev, + "More than %d MMIO regions found in platform configuration device, ignoring extras\n", + CX7_HP_MMIO_REGION_COUNT); + break; + } + parsed->mmio_regions[parsed->count] = ares->data.fixed_memory32; + parsed->count++; + break; + default: + break; + } + + return AE_OK; +} + +/** + * cx7_hp_find_pcie_config_device - Find PCIe configuration device by HID + * + * Finds the ACPI device that provides PCIe configuration via _DSD properties + * and MMIO resources via _CRS. + * + * Returns: acpi_device pointer on success (with reference), NULL on failure + */ +static struct acpi_device *cx7_hp_find_pcie_config_device(void) +{ + return acpi_dev_get_first_match_dev("PNP0C02", NULL, -1); +} + +/** + * cx7_hp_parse_pcie_config_dsd - Parse PCIe configuration from _DSD + * @pdev: platform device + * @pd: platform data to populate + * + * Parses PCIe MMIO register offsets, bit positions, port configuration, and PCIe device + * identification from PCIe configuration device _DSD. + * + * Returns: 0 on success, negative error code on failure + */ +static int cx7_hp_parse_pcie_config_dsd(struct platform_device *pdev, + struct cx7_hp_plat_data *pd) +{ + struct acpi_device *config_adev; + struct device *dev = &pdev->dev; + u32 val, bit1; + + config_adev = cx7_hp_find_pcie_config_device(); + if (!config_adev) { + dev_err(dev, + "Platform configuration device (PNP0C02) not found - _DSD is required\n"); + return -ENODEV; + } + + if (!acpi_dev_has_props(config_adev)) { + dev_err(dev, + "Platform configuration device has no _DSD properties. Check DSDT.\n"); + acpi_dev_put(config_adev); + return -EINVAL; + } + + if (fwnode_property_read_u32 + (acpi_fwnode_handle(config_adev), "mac-init-ctrl-offset", &val)) { + dev_err(dev, + "Missing required _DSD property: mac-init-ctrl-offset\n"); + acpi_dev_put(config_adev); + return -EINVAL; + } + pd->rp_bus_mmio.mac.init_ctrl = val; + + if (fwnode_property_read_u32 + (acpi_fwnode_handle(config_adev), "mac-ltssm-bit", &val)) { + dev_err(dev, "Missing required _DSD property: mac-ltssm-bit\n"); + acpi_dev_put(config_adev); + return -EINVAL; + } + pd->rp_bus_mmio.mac.ltssm_bit = BIT(val); + + if (fwnode_property_read_u32 + (acpi_fwnode_handle(config_adev), "mac-phy-rst-bit", &val)) { + dev_err(dev, + "Missing required _DSD property: mac-phy-rst-bit\n"); + acpi_dev_put(config_adev); + return -EINVAL; + } + pd->rp_bus_mmio.mac.phy_rst_bit = BIT(val); + + if (fwnode_property_read_u32 + (acpi_fwnode_handle(config_adev), "top-ctrl-offset", &val)) { + dev_err(dev, + "Missing required _DSD property: top-ctrl-offset\n"); + acpi_dev_put(config_adev); + return -EINVAL; + } + pd->rp_bus_mmio.top.ctrl = val; + + if (fwnode_property_read_u32 + (acpi_fwnode_handle(config_adev), "top-update-bit", &val)) { + dev_err(dev, + "Missing required _DSD property: top-update-bit\n"); + acpi_dev_put(config_adev); + return -EINVAL; + } + pd->rp_bus_mmio.top.update_bit = BIT(val); + + if (fwnode_property_read_u32 + (acpi_fwnode_handle(config_adev), "top-port0-bit", &val)) { + dev_err(dev, "Missing required _DSD property: top-port0-bit\n"); + acpi_dev_put(config_adev); + return -EINVAL; + } + pd->rp_bus_mmio.top.port_bits[0] = BIT(val); + + if (fwnode_property_read_u32 + (acpi_fwnode_handle(config_adev), "top-port1-bit", &val)) { + dev_err(dev, "Missing required _DSD property: top-port1-bit\n"); + acpi_dev_put(config_adev); + return -EINVAL; + } + pd->rp_bus_mmio.top.port_bits[1] = BIT(val); + + if (fwnode_property_read_u32 + (acpi_fwnode_handle(config_adev), "protect-mode-offset", &val)) { + dev_err(dev, + "Missing required _DSD property: protect-mode-offset\n"); + acpi_dev_put(config_adev); + return -EINVAL; + } + pd->rp_bus_mmio.protect.mode = val; + + if (fwnode_property_read_u32 + (acpi_fwnode_handle(config_adev), "protect-enable-offset", &val)) { + dev_err(dev, + "Missing required _DSD property: protect-enable-offset\n"); + acpi_dev_put(config_adev); + return -EINVAL; + } + pd->rp_bus_mmio.protect.enable = val; + + if (fwnode_property_read_u32 + (acpi_fwnode_handle(config_adev), "protect-port0-bit", &val)) { + dev_err(dev, + "Missing required _DSD property: protect-port0-bit\n"); + acpi_dev_put(config_adev); + return -EINVAL; + } + pd->rp_bus_mmio.protect.port_bits[0] = BIT(val); + + if (fwnode_property_read_u32 + (acpi_fwnode_handle(config_adev), "protect-port1-bit", &val)) { + dev_err(dev, + "Missing required _DSD property: protect-port1-bit\n"); + acpi_dev_put(config_adev); + return -EINVAL; + } + pd->rp_bus_mmio.protect.port_bits[1] = BIT(val); + + if (fwnode_property_read_u32 + (acpi_fwnode_handle(config_adev), "ckm-ctrl-offset", &val)) { + dev_err(dev, + "Missing required _DSD property: ckm-ctrl-offset\n"); + acpi_dev_put(config_adev); + return -EINVAL; + } + pd->rp_bus_mmio.ckm.ctrl = val; + + if (fwnode_property_read_u32 + (acpi_fwnode_handle(config_adev), "ckm-disable-bit0", &val)) { + dev_err(dev, + "Missing required _DSD property: ckm-disable-bit0\n"); + acpi_dev_put(config_adev); + return -EINVAL; + } + if (fwnode_property_read_u32 + (acpi_fwnode_handle(config_adev), "ckm-disable-bit1", &bit1)) { + dev_err(dev, + "Missing required _DSD property: ckm-disable-bit1\n"); + acpi_dev_put(config_adev); + return -EINVAL; + } + pd->rp_bus_mmio.ckm.disable_bit = BIT(val) | BIT(bit1); + + if (fwnode_property_read_u32 + (acpi_fwnode_handle(config_adev), "ltssm-reg-offset", &val)) { + dev_err(dev, + "Missing required _DSD property: ltssm-reg-offset\n"); + acpi_dev_put(config_adev); + return -EINVAL; + } + pd->ltssm_reg = val; + + if (fwnode_property_read_u32 + (acpi_fwnode_handle(config_adev), "ltssm-l0-state", &val)) { + dev_err(dev, + "Missing required _DSD property: ltssm-l0-state\n"); + acpi_dev_put(config_adev); + return -EINVAL; + } + pd->ltssm_l0_state = val; + + if (fwnode_property_read_u32 + (acpi_fwnode_handle(config_adev), "port-nums", &val)) { + dev_err(dev, "Missing required _DSD property: port-nums\n"); + acpi_dev_put(config_adev); + return -EINVAL; + } + if (val == 0 || val > HP_PORT_MAX) { + dev_err(dev, + "Invalid _DSD property port-nums: %u (must be 1-%d)\n", + val, HP_PORT_MAX); + acpi_dev_put(config_adev); + return -EINVAL; + } + pd->port_nums = val; + + if (fwnode_property_read_u32 + (acpi_fwnode_handle(config_adev), "port0-domain", &val)) { + dev_err(dev, "Missing required _DSD property: port0-domain\n"); + acpi_dev_put(config_adev); + return -EINVAL; + } + pd->ports[0].domain = val; + + if (fwnode_property_read_u32 + (acpi_fwnode_handle(config_adev), "port0-bus", &val)) { + dev_err(dev, "Missing required _DSD property: port0-bus\n"); + acpi_dev_put(config_adev); + return -EINVAL; + } + pd->ports[0].bus = val; + + if (fwnode_property_read_u32 + (acpi_fwnode_handle(config_adev), "port0-devfn", &val)) { + dev_err(dev, "Missing required _DSD property: port0-devfn\n"); + acpi_dev_put(config_adev); + return -EINVAL; + } + pd->ports[0].devfn = val; + + if (pd->port_nums >= 2) { + if (fwnode_property_read_u32 + (acpi_fwnode_handle(config_adev), "port1-domain", &val)) { + dev_err(dev, + "Missing required _DSD property: port1-domain\n"); + acpi_dev_put(config_adev); + return -EINVAL; + } + pd->ports[1].domain = val; + + if (fwnode_property_read_u32 + (acpi_fwnode_handle(config_adev), "port1-bus", &val)) { + dev_err(dev, + "Missing required _DSD property: port1-bus\n"); + acpi_dev_put(config_adev); + return -EINVAL; + } + pd->ports[1].bus = val; + + if (fwnode_property_read_u32 + (acpi_fwnode_handle(config_adev), "port1-devfn", &val)) { + dev_err(dev, + "Missing required _DSD property: port1-devfn\n"); + acpi_dev_put(config_adev); + return -EINVAL; + } + pd->ports[1].devfn = val; + } + + if (fwnode_property_read_u32 + (acpi_fwnode_handle(config_adev), "vendor-id", &val)) { + dev_err(dev, "Missing required _DSD property: vendor-id\n"); + acpi_dev_put(config_adev); + return -EINVAL; + } + pd->vendor_id = val; + + if (fwnode_property_read_u32 + (acpi_fwnode_handle(config_adev), "device-id", &val)) { + dev_err(dev, "Missing required _DSD property: device-id\n"); + acpi_dev_put(config_adev); + return -EINVAL; + } + pd->device_id = val; + + if (fwnode_property_read_u32 + (acpi_fwnode_handle(config_adev), "num-devices", &val)) { + dev_err(dev, "Missing required _DSD property: num-devices\n"); + acpi_dev_put(config_adev); + return -EINVAL; + } + pd->num_devices = val; + + dev_dbg(dev, "Successfully parsed all required _DSD properties\n"); + + acpi_dev_put(config_adev); + return 0; +} + +/** + * cx7_hp_parse_mmio_resources_from_acpi - Parse MMIO regions from _CRS + * @dev: hotplug device + * @parsed: pointer to parsed MMIO structure + * + * Returns: 0 on success, negative error code on failure + */ +static int cx7_hp_parse_mmio_resources_from_acpi(struct cx7_hp_dev *dev, + struct cx7_hp_acpi_mmio + *parsed) +{ + struct acpi_device *config_adev; + acpi_status status; + int ret = 0; + + if (!dev || !dev->pdev) { + return -EINVAL; + } + + config_adev = cx7_hp_find_pcie_config_device(); + if (!config_adev) + return -ENODEV; + + parsed->count = 0; + memset(parsed->mmio_regions, 0, sizeof(parsed->mmio_regions)); + + status = + acpi_walk_resources(config_adev->handle, METHOD_NAME__CRS, + cx7_hp_parse_mmio_resources, parsed); + if (ACPI_FAILURE(status)) { + dev_err(&dev->pdev->dev, + "Failed to walk platform configuration resources: %s\n", + acpi_format_exception(status)); + ret = -ENODEV; + goto out; + } + + if (parsed->count < CX7_HP_MMIO_REGION_COUNT) { + dev_warn(&dev->pdev->dev, + "Expected %d MMIO regions from platform configuration device, found %d\n", + CX7_HP_MMIO_REGION_COUNT, parsed->count); + ret = -ENODEV; + goto out; + } + +out: + acpi_dev_put(config_adev); + return ret; +} + +/** + * cx7_hp_map_mmio_resources - Map all MMIO regions from ACPI _CRS + * @dev: hotplug device + * + * Returns: 0 on success, negative error code on failure + */ +static int cx7_hp_map_mmio_resources(struct cx7_hp_dev *dev) +{ + struct platform_device *pdev = dev->pdev; + struct cx7_hp_acpi_mmio parsed = {.count = 0, .dev = &pdev->dev }; + int ret; + int i; + + ret = cx7_hp_parse_mmio_resources_from_acpi(dev, &parsed); + if (ret) { + dev_err(&pdev->dev, + "Failed to get MMIO regions from platform configuration device\n"); + return ret; + } + + dev_dbg(&pdev->dev, "Found %d MMIO regions in _CRS, mapping...\n", + parsed.count); + + int mapped_count = 0; + for (i = 0; i < parsed.count; i++) { + void __iomem *base = NULL; + u32 addr = parsed.mmio_regions[i].address; + u32 size = parsed.mmio_regions[i].address_length; + + switch (i) { + case 0: + if (dev->pd->port_nums >= 1) { + base = devm_ioremap(&pdev->dev, addr, size); + if (!base) { + dev_err(&pdev->dev, + "Failed to map MAC Port 0 region (0x%08x)\n", + addr); + return -ENOMEM; + } + dev->mmio.mac_port_base[0] = base; + mapped_count++; + } + break; + case 1: + if (dev->pd->port_nums >= 2) { + base = devm_ioremap(&pdev->dev, addr, size); + if (!base) { + dev_err(&pdev->dev, + "Failed to map MAC Port 1 region (0x%08x)\n", + addr); + return -ENOMEM; + } + dev->mmio.mac_port_base[1] = base; + mapped_count++; + } + break; + case 2: + base = devm_ioremap(&pdev->dev, addr, size); + if (!base) { + dev_err(&pdev->dev, + "Failed to map TOP region (0x%08x)\n", + addr); + return -ENOMEM; + } + dev->mmio.top_base = base; + mapped_count++; + break; + case 3: + base = devm_ioremap(&pdev->dev, addr, size); + if (!base) { + dev_err(&pdev->dev, + "Failed to map PROTECT region (0x%08x)\n", + addr); + return -ENOMEM; + } + dev->mmio.protect_base = base; + mapped_count++; + break; + case 4: + base = devm_ioremap(&pdev->dev, addr, size); + if (!base) { + dev_err(&pdev->dev, + "Failed to map CKM region (0x%08x)\n", + addr); + return -ENOMEM; + } + dev->mmio.ckm_base = base; + mapped_count++; + break; + default: + dev_warn(&pdev->dev, + "Unexpected MMIO region at 0x%08x (size 0x%x), skipping\n", + addr, size); + break; + } + } + + if (!dev->mmio.top_base || !dev->mmio.protect_base + || !dev->mmio.ckm_base || (dev->pd->port_nums >= 1 + && !dev->mmio.mac_port_base[0]) + || (dev->pd->port_nums >= 2 && !dev->mmio.mac_port_base[1])) { + dev_err(&pdev->dev, + "Required MMIO regions not mapped from ACPI _CRS (mapped %d)\n", + mapped_count); + if (!dev->mmio.top_base) + dev_err(&pdev->dev, " Missing: TOP\n"); + if (!dev->mmio.protect_base) + dev_err(&pdev->dev, " Missing: PROTECT\n"); + if (!dev->mmio.ckm_base) + dev_err(&pdev->dev, " Missing: CKM\n"); + if (dev->pd->port_nums >= 1 && !dev->mmio.mac_port_base[0]) + dev_err(&pdev->dev, + " Missing: MAC Port 0 (port_nums=%d)\n", + dev->pd->port_nums); + if (dev->pd->port_nums >= 2 && !dev->mmio.mac_port_base[1]) + dev_err(&pdev->dev, + " Missing: MAC Port 1 (port_nums=%d)\n", + dev->pd->port_nums); + dev->mmio.top_base = NULL; + dev->mmio.protect_base = NULL; + dev->mmio.ckm_base = NULL; + for (i = 0; i < HP_PORT_MAX; i++) + dev->mmio.mac_port_base[i] = NULL; + return -ENODEV; + } + + dev_dbg(&pdev->dev, + "Successfully mapped all MMIO regions from ACPI _CRS\n"); + return 0; +} + +/** + * cx7_hp_rp_bus_protect - Bus protection handler + * @dev: hotplug device + * @port_idx: port index (0-based) + * @stage: protection stage (BUS_PROTECT_INIT, BUS_PROTECT_CLEANUP, etc.) + */ +static void cx7_hp_rp_bus_protect(struct cx7_hp_dev *dev, int port_idx, + int stage) +{ + switch (stage) { + case BUS_PROTECT_INIT: + { + int ret; + + ret = cx7_hp_map_mmio_resources(dev); + if (ret) { + dev_err(&dev->pdev->dev, + "Failed to map MMIO resources during bus init: %d\n", + ret); + return; + } + } + return; + + case BUS_PROTECT_CLEANUP: + { + int i; + + for (i = 0; i < HP_PORT_MAX; i++) { + if (dev->mmio.mac_port_base[i]) + dev->mmio.mac_port_base[i] = NULL; + } + if (dev->mmio.top_base) + dev->mmio.top_base = NULL; + if (dev->mmio.protect_base) + dev->mmio.protect_base = NULL; + if (dev->mmio.ckm_base) + dev->mmio.ckm_base = NULL; + } + return; + + case BUS_PROTECT_CABLE_REMOVAL: + case BUS_PROTECT_CABLE_PLUGIN: + { + struct rp_bus_mmio_info *mmio_info = + &dev->pd->rp_bus_mmio; + void __iomem *mac_base; + + if (port_idx >= dev->pd->port_nums) + return; + + mac_base = dev->mmio.mac_port_base[port_idx]; + if (!mac_base) + return; + + if (stage == BUS_PROTECT_CABLE_REMOVAL) { + cx7_hp_reg_update_bits(mac_base, + mmio_info->mac.init_ctrl, + mmio_info->mac.ltssm_bit, + false); + cx7_hp_reg_update_bits(mac_base, + mmio_info->mac.init_ctrl, + mmio_info->mac. + phy_rst_bit, false); + return; + } + + cx7_hp_toggle_update_bit(dev->mmio.top_base, + mmio_info->top.ctrl, + mmio_info->top. + port_bits[port_idx], + mmio_info->top.update_bit, + false); + udelay(CX7_HP_DELAY_SHORT_US); + + cx7_hp_bus_protect_enable(dev, port_idx); + usleep_range(CX7_HP_DELAY_BUS_PROTECT_US, + CX7_HP_DELAY_BUS_PROTECT_US + 1000); + + cx7_hp_reg_update_bits(mac_base, + mmio_info->mac.init_ctrl, + mmio_info->mac.phy_rst_bit, + true); + cx7_hp_reg_update_bits(mac_base, + mmio_info->mac.init_ctrl, + mmio_info->mac.ltssm_bit, true); + usleep_range(CX7_HP_DELAY_PHY_RESET_US, + CX7_HP_DELAY_PHY_RESET_US + 1000); + + cx7_hp_bus_protect_disable(dev, port_idx); + + cx7_hp_toggle_update_bit(dev->mmio.top_base, + mmio_info->top.ctrl, + mmio_info->top. + port_bits[port_idx], + mmio_info->top.update_bit, + true); + } + break; + + default: + dev_warn(&dev->pdev->dev, "Unknown bus protect stage: %d\n", + stage); + break; + } +} + +/** + * retrain_pcie_link - Retrain PCIe link + * @dev: PCI device + */ +static void retrain_pcie_link(struct pci_dev *dev) +{ + u16 link_control, lnksta; + int pos, i = 0; + + pos = pci_find_capability(dev, PCI_CAP_ID_EXP); + if (!pos) { + dev_err(&dev->dev, "PCIe capability not found\n"); + return; + } + + pci_read_config_word(dev, pos + PCI_EXP_LNKCTL, &link_control); + link_control |= PCI_EXP_LNKCTL_RL; + + pci_write_config_word(dev, pos + PCI_EXP_LNKCTL, link_control); + + while (i < HP_POLL_CNT_MAX) { + i++; + pcie_capability_read_word(dev, PCI_EXP_LNKSTA, &lnksta); + if (lnksta & PCI_EXP_LNKSTA_DLLLA) + break; + usleep_range(CX7_HP_POLL_SLEEP_US, CX7_HP_POLL_SLEEP_US + 1000); + } + + pcie_capability_write_word(dev, PCI_EXP_LNKSTA, PCI_EXP_LNKSTA_LBMS); +} + +/** + * get_port_root_port - Get PCI root port device for a port + * @hp_dev: hotplug device + * @port_idx: port index + * + * Returns cached or newly found root port, or NULL if not found. + */ +static struct pci_dev *get_port_root_port(struct cx7_hp_dev *hp_dev, + int port_idx) +{ + struct pcie_port_info *port; + + if (!hp_dev->pd || port_idx >= hp_dev->pd->port_nums) + return NULL; + + port = &hp_dev->pd->ports[port_idx]; + + if (!hp_dev->cached_root_ports[port_idx]) { + hp_dev->cached_root_ports[port_idx] = + pci_get_domain_bus_and_slot(port->domain, + port->bus, port->devfn); + if (!hp_dev->cached_root_ports[port_idx]) { + dev_warn(&hp_dev->pdev->dev, + "Root port not found for domain %d bus %d\n", + port->domain, port->bus); + return NULL; + } + } + + return hp_dev->cached_root_ports[port_idx]; +} + +/** + * remove_device - Remove PCIe devices and power down hardware + * @dev: hotplug device + */ +static void remove_device(struct cx7_hp_dev *dev) +{ + int i; + + dev_info(&dev->pdev->dev, "Cable removal\n"); + + for (i = 0; i < dev->pd->port_nums; i++) + cx7_hp_rp_bus_protect(dev, i, BUS_PROTECT_CABLE_REMOVAL); + + gpiod_set_value(dev->pins[PCIE_PIN_PERST].desc, 0); + cx7_hp_change_pinctrl_state(dev, "default"); + cx7_hp_ckm_control(dev, true); + gpiod_set_value(dev->pins[PCIE_PIN_EN].desc, 0); +} + +/** + * polling_link_to_l0 - Poll until all PCIe ports reach L0 state + * @dev: hotplug device + * + * Returns: 0 on success, negative error code on failure + */ +static int polling_link_to_l0(struct cx7_hp_dev *dev) +{ + struct pci_dev *pci_dev; + u32 ltssm_reg; + u32 l0_state; + u32 ltssm_vals[HP_PORT_MAX] = { 0 }; + int count = 0; + int i; + bool all_l0; + + ltssm_reg = dev->pd->ltssm_reg; + l0_state = dev->pd->ltssm_l0_state; + + if (!ltssm_reg || !l0_state) + return 0; /* Skip if not configured */ + + /* Poll until all ports reach L0 state */ + all_l0 = false; + while (!all_l0) { + all_l0 = true; + + for (i = 0; i < dev->pd->port_nums; i++) { + pci_dev = get_port_root_port(dev, i); + if (!pci_dev) { + all_l0 = false; + continue; + } + + pci_read_config_dword(pci_dev, ltssm_reg, + <ssm_vals[i]); + if ((ltssm_vals[i] & l0_state) != l0_state) + all_l0 = false; + } + + if (all_l0) + break; + + usleep_range(CX7_HP_POLL_SLEEP_US, CX7_HP_POLL_SLEEP_US + 1000); + count++; + + if (count > HP_POLL_CNT_MAX) { + dev_err(&dev->pdev->dev, + "Timeout waiting for link to reach L0 (reached max count)\n"); + break; + } + } + + if (count > HP_POLL_CNT_MAX) { + return -ETIMEDOUT; + } + + return 0; +} + +/** + * rescan_device - Rescan PCIe bus to discover devices + * @dev: hotplug device + * + * Returns: 0 on success, negative error code on failure + */ +static int rescan_device(struct cx7_hp_dev *dev) +{ + struct pci_dev *pci_dev; + int i, err; + + err = cx7_hp_change_pinctrl_state(dev, "clkreqn"); + if (err) + return err; + + cx7_hp_ckm_control(dev, false); + usleep_range(CX7_HP_DELAY_STANDARD_US, CX7_HP_DELAY_STANDARD_US + 1000); + + for (i = 0; i < dev->pd->port_nums; i++) { + pci_dev = get_port_root_port(dev, i); + if (!pci_dev) + continue; + + err = pm_runtime_resume_and_get(&pci_dev->dev); + if (err < 0) { + dev_err(&dev->pdev->dev, + "Runtime resume failed for %s: %d\n", + pci_name(pci_dev), err); + } + } + + gpiod_set_value(dev->pins[PCIE_PIN_PERST].desc, 1); + + for (i = 0; i < dev->pd->port_nums; i++) + cx7_hp_rp_bus_protect(dev, i, BUS_PROTECT_CABLE_PLUGIN); + + err = polling_link_to_l0(dev); + if (err) + return err; + + for (i = 0; i < dev->pd->port_nums; i++) { + pci_dev = get_port_root_port(dev, i); + if (pci_dev) + retrain_pcie_link(pci_dev); + } + + msleep(CX7_HP_DELAY_LINK_STABLE_MS); + + return 0; +} + +/** + * cx7_hp_work - Work queue handler for hotplug state machine + * @irq: interrupt number + * @dev_id: GPIO context pointer + * + * Processes hotplug state transitions based on current state. + */ +static irqreturn_t cx7_hp_work(int irq, void *dev_id) +{ + struct cx7_hp_gpio_ctx *app_ctx = dev_id; + struct cx7_hp_dev *hp_dev; + enum cx7_hp_state state; + unsigned long flags; + int ret; + + if (!app_ctx || !app_ctx->hp_dev) + return IRQ_NONE; + + hp_dev = app_ctx->hp_dev; + + spin_lock_irqsave(&hp_dev->lock, flags); + if (!hp_dev->hotplug_enabled) { + spin_unlock_irqrestore(&hp_dev->lock, flags); + return IRQ_HANDLED; + } + state = hp_dev->state; + spin_unlock_irqrestore(&hp_dev->lock, flags); + + switch (state) { + case STATE_PLUG_OUT: + remove_device(hp_dev); + break; + case STATE_PLUG_IN: + dev_info(&hp_dev->pdev->dev, "Cable plugin\n"); + gpiod_set_value(hp_dev->pins[PCIE_PIN_EN].desc, 1); + break; + case STATE_DEV_POWER_OFF: + case STATE_DEV_POWER_ON: + case STATE_DEV_FW_START: + break; + case STATE_RESCAN: + ret = rescan_device(hp_dev); + spin_lock_irqsave(&hp_dev->lock, flags); + if (ret) + dev_err(app_ctx->ctx->dev, "Rescan failed: %d\n", ret); + else + hp_dev->state = STATE_READY; + spin_unlock_irqrestore(&hp_dev->lock, flags); + break; + default: + dev_err(app_ctx->ctx->dev, "Unknown state: %d\n", state); + break; + } + + return IRQ_HANDLED; +} + +/** + * hotplug_irq_handler - GPIO interrupt handler for hotplug events + * @irq: interrupt number + * @dev_id: GPIO context pointer + * + * Handles presence detection and boot status GPIO interrupts. + */ +static irqreturn_t hotplug_irq_handler(int irq, void *dev_id) +{ + struct cx7_hp_gpio_ctx *app_ctx = dev_id; + struct cx7_hp_dev *hp_dev = app_ctx->hp_dev; + struct gpio_acpi_context *gpio_ctx = app_ctx->ctx; + unsigned long flags; + int value; + enum cx7_hp_state state; + + value = gpiod_get_value(app_ctx->desc); + + if (gpio_ctx->pin == hp_dev->prsnt_pin) { + if (value) { + cx7_hp_send_uevent(hp_dev, REMOVAL_EVT); + } else { + cx7_hp_send_uevent(hp_dev, PLUG_IN_EVT); + } + return IRQ_HANDLED; + } + + spin_lock_irqsave(&hp_dev->lock, flags); + if (!hp_dev->hotplug_enabled) { + spin_unlock_irqrestore(&hp_dev->lock, flags); + return IRQ_HANDLED; + } + state = hp_dev->state; + + if (gpio_ctx->pin == hp_dev->boot_pin) { + if (value && state == STATE_PLUG_IN) { + hp_dev->state = STATE_DEV_POWER_ON; + } else if (value && state == STATE_DEV_FW_START) { + hp_dev->state = STATE_RESCAN; + } else if (!value && state == STATE_DEV_POWER_ON) { + hp_dev->state = STATE_DEV_FW_START; + } else if (!value && state == STATE_PLUG_OUT) { + hp_dev->state = STATE_DEV_POWER_OFF; + } else { + spin_unlock_irqrestore(&hp_dev->lock, flags); + return IRQ_HANDLED; + } + spin_unlock_irqrestore(&hp_dev->lock, flags); + return IRQ_WAKE_THREAD; + } + + dev_err(gpio_ctx->dev, + "Unknown GPIO pin event: pin=%d irq=%d value=%d\n", + gpio_ctx->pin, irq, value); + spin_unlock_irqrestore(&hp_dev->lock, flags); + return IRQ_HANDLED; +} + +/** + * acpi_gpio_collect_handler - ACPI resource handler to collect all GPIO resources + * @ares: ACPI resource structure + * @context: Pointer to acpi_gpio_walk_context + * + * Returns: AE_OK to continue iteration + */ +static acpi_status acpi_gpio_collect_handler(struct acpi_resource *ares, + void *context) +{ + struct acpi_gpio_walk_context *walk_ctx = context; + struct acpi_resource_gpio *agpio; + int length; + + if (ares->type != ACPI_RESOURCE_TYPE_GPIO) + return AE_OK; + + if (walk_ctx->count >= PCIE_PIN_MAX) { + dev_warn(walk_ctx->dev, + "Too many GPIO resources, truncating at %d\n", + PCIE_PIN_MAX); + return AE_OK; + } + + agpio = &ares->data.gpio; + + if (!agpio->pin_table || agpio->pin_table_length == 0) { + dev_warn(walk_ctx->dev, "GPIO resource has no pin table\n"); + return AE_OK; + } + + walk_ctx->gpios[walk_ctx->count].pin = agpio->pin_table[0]; + walk_ctx->gpios[walk_ctx->count].connection_type = + agpio->connection_type; + walk_ctx->gpios[walk_ctx->count].triggering = agpio->triggering; + walk_ctx->gpios[walk_ctx->count].polarity = agpio->polarity; + walk_ctx->gpios[walk_ctx->count].debounce_timeout = + agpio->debounce_timeout; + walk_ctx->gpios[walk_ctx->count].wake_capable = agpio->wake_capable; + + if (agpio->vendor_length && agpio->vendor_data) { + length = min_t(int, agpio->vendor_length, MAX_VENDOR_DATA_LEN); + memcpy(walk_ctx->gpios[walk_ctx->count].vendor_data, + agpio->vendor_data, length); + walk_ctx->gpios[walk_ctx->count].vendor_data[length] = '\0'; + } else { + walk_ctx->gpios[walk_ctx->count].vendor_data[0] = '\0'; + } + + if (agpio->resource_source.string_ptr) { + length = min_t(int, agpio->resource_source.string_length, 15); + memcpy(walk_ctx->gpios[walk_ctx->count].resource_source, + agpio->resource_source.string_ptr, length); + walk_ctx->gpios[walk_ctx->count].resource_source[length] = '\0'; + } else { + walk_ctx->gpios[walk_ctx->count].resource_source[0] = '\0'; + } + walk_ctx->gpios[walk_ctx->count].resource_source_index = + agpio->resource_source.index; + walk_ctx->count++; + return AE_OK; +} + +/** + * cx7_hp_walk_acpi_gpios - Walk ACPI _CRS to collect all GPIO resources + * @pdev: Platform device + * @walk_ctx: Context structure to fill with GPIO information + * + * Returns: 0 on success, negative error code on failure + */ +static int cx7_hp_walk_acpi_gpios(struct platform_device *pdev, + struct acpi_gpio_walk_context *walk_ctx) +{ + struct acpi_device *adev; + acpi_status status; + + adev = ACPI_COMPANION(&pdev->dev); + if (!adev) { + dev_err(&pdev->dev, "Failed to get ACPI companion device\n"); + return -ENODEV; + } + + memset(walk_ctx, 0, sizeof(*walk_ctx)); + walk_ctx->dev = &pdev->dev; + + status = acpi_walk_resources(adev->handle, METHOD_NAME__CRS, + acpi_gpio_collect_handler, walk_ctx); + if (ACPI_FAILURE(status)) { + dev_err(&pdev->dev, "Failed to walk ACPI GPIO resources: %s\n", + acpi_format_exception(status)); + return -EIO; + } + + dev_dbg(&pdev->dev, "Found %d GPIO resources via ACPI walk\n", + walk_ctx->count); + + if (walk_ctx->count == 0) { + dev_err(&pdev->dev, "No GPIO resources found in ACPI _CRS\n"); + return -ENODEV; + } + + return 0; +} + +/** + * acpi_gpio_lookup_handler - ACPI resource handler to look up a specific GPIO pin + * @ares: ACPI resource being processed + * @context: Pointer to acpi_gpio_parse_context + * + * Returns: AE_OK to continue iteration + */ +static acpi_status acpi_gpio_lookup_handler(struct acpi_resource *ares, + void *context) +{ + struct acpi_gpio_parse_context *parse_ctx = context; + struct gpio_acpi_context *ctx = parse_ctx->ctx; + struct cx7_hp_dev *hp_dev = parse_ctx->hp_dev; + struct acpi_resource_gpio *agpio; + int length; + + if (ares->type != ACPI_RESOURCE_TYPE_GPIO) + return AE_OK; + + agpio = &ares->data.gpio; + + if (ctx->pin != agpio->pin_table[0]) + return AE_OK; + + ctx->valid = 1; + ctx->debounce_timeout_us = agpio->debounce_timeout * 10; + ctx->wake_capable = agpio->wake_capable; + ctx->triggering = agpio->triggering; + ctx->polarity = agpio->polarity; + ctx->connection_type = agpio->connection_type; + + if (agpio->vendor_length && agpio->vendor_data && hp_dev) { + length = min_t(int, agpio->vendor_length, MAX_VENDOR_DATA_LEN); + memcpy(&ctx->vendor_data[0], agpio->vendor_data, length); + ctx->vendor_data[length] = '\0'; + + if (!strncmp("BOOT", ctx->vendor_data, strlen("BOOT"))) + hp_dev->boot_pin = ctx->pin; + else if (!strncmp("PRSNT", ctx->vendor_data, strlen("PRSNT"))) + hp_dev->prsnt_pin = ctx->pin; + } + + if (agpio->triggering == ACPI_EDGE_SENSITIVE) { + if (agpio->polarity == ACPI_ACTIVE_LOW) + ctx->irq_flags = IRQF_TRIGGER_FALLING; + else if (agpio->polarity == ACPI_ACTIVE_HIGH) + ctx->irq_flags = IRQF_TRIGGER_RISING; + else + ctx->irq_flags = + (IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING); + } else { + if (agpio->polarity == ACPI_ACTIVE_LOW) + ctx->irq_flags = IRQF_TRIGGER_LOW; + else + ctx->irq_flags = IRQF_TRIGGER_HIGH; + } + + return AE_OK; +} + +/** + * pci_devices_present_on_domain() - Check if PCI devices exist on a domain + * @domain: PCI domain number to check + * + * Returns: true if any PCI devices are present on the specified domain, + * false otherwise. This is used as a safety check before hardware shutdown. + */ +static bool pci_devices_present_on_domain(int domain) +{ + struct pci_bus *bus; + struct pci_dev *dev; + bool has_endpoint_devices = false; + + bus = pci_find_bus(domain, 1); + if (!bus) + return false; + + list_for_each_entry(dev, &bus->devices, bus_list) { + has_endpoint_devices = true; + break; + } + + return has_endpoint_devices; +} + +static ssize_t debug_state_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cx7_hp_dev *hp_dev = dev_get_drvdata(dev); + + if (!hp_dev) + return -EINVAL; + + return scnprintf(buf, PAGE_SIZE, "%d\n", hp_dev->debug_state); +} + +static ssize_t debug_state_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cx7_hp_dev *hp_dev = dev_get_drvdata(dev); + unsigned long val, flags; + int err, i; + + if (!hp_dev || !hp_dev->pd) + return -EINVAL; + + err = kstrtoul(buf, 10, &val); + if (err) + return err; + + spin_lock_irqsave(&hp_dev->lock, flags); + if (!hp_dev->hotplug_enabled) { + spin_unlock_irqrestore(&hp_dev->lock, flags); + dev_info(dev, "Hotplug is disabled.\n"); + return -EPERM; + } + spin_unlock_irqrestore(&hp_dev->lock, flags); + + switch (val) { + case CX7_HP_DEBUG_PLUG_OUT: + /* Safety check: Verify no devices on the bus before hardware shutdown. */ + for (i = 0; i < hp_dev->pd->port_nums; i++) { + if (pci_devices_present_on_domain + (hp_dev->pd->ports[i].domain)) { + dev_err(dev, + "PCI devices still present, remove them first\n"); + return -EBUSY; + } + } + + spin_lock_irqsave(&hp_dev->lock, flags); + hp_dev->state = STATE_PLUG_OUT; + hp_dev->debug_state = val; + spin_unlock_irqrestore(&hp_dev->lock, flags); + remove_device(hp_dev); + return count; + + case CX7_HP_DEBUG_PLUG_IN: + for (i = 0; i < hp_dev->pd->port_nums; i++) { + if (pci_devices_present_on_domain + (hp_dev->pd->ports[i].domain)) { + dev_err(dev, + "PCI devices already present, cannot reinitialize hardware\n"); + return -EBUSY; + } + } + + spin_lock_irqsave(&hp_dev->lock, flags); + hp_dev->state = STATE_PLUG_IN; + hp_dev->debug_state = val; + spin_unlock_irqrestore(&hp_dev->lock, flags); + dev_info(dev, "Cable plugin\n"); + gpiod_set_value(hp_dev->pins[PCIE_PIN_EN].desc, 1); + return count; + + default: + return -EINVAL; + } + + return count; +} + +DEVICE_ATTR_RW(debug_state); + +static ssize_t hotplug_enabled_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cx7_hp_dev *hp_dev = dev_get_drvdata(dev); + + if (!hp_dev) + return -EINVAL; + + return scnprintf(buf, PAGE_SIZE, "%d\n", hp_dev->hotplug_enabled ? 1 : 0); +} + +static ssize_t hotplug_enabled_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cx7_hp_dev *hp_dev = dev_get_drvdata(dev); + unsigned long val; + int err; + + if (!hp_dev) + return -EINVAL; + + err = kstrtoul(buf, 10, &val); + if (err) + return err; + + hp_dev->hotplug_enabled = (val != 0); + dev_info(dev, "Hotplug %s\n", hp_dev->hotplug_enabled ? "enabled" : "disabled"); + + return count; +} + +DEVICE_ATTR_RW(hotplug_enabled); + +static struct attribute *cx7_hp_attrs[] = { + &dev_attr_debug_state.attr, + &dev_attr_hotplug_enabled.attr, + NULL +}; + +static const struct attribute_group cx7_hp_attr_group = { + .name = "pcie_hotplug", + .attrs = cx7_hp_attrs +}; + +/** + * gpio_acpi_setup - Setup GPIO ACPI context from _CRS + * @pdev: platform device + * @desc: GPIO descriptor + * @hp_dev: hotplug device + * @gpio_index: GPIO index + * + * Returns: GPIO ACPI context on success, NULL on failure + */ +static struct gpio_acpi_context *gpio_acpi_setup(struct platform_device *pdev, + struct gpio_desc *desc, + struct cx7_hp_dev *hp_dev, + int gpio_index) +{ + struct acpi_gpio_parse_context parse_ctx; + struct gpio_acpi_context *ctx; + struct acpi_device *adev; + acpi_status status; + + adev = ACPI_COMPANION(&pdev->dev); + if (!adev) { + dev_err(&pdev->dev, "Failed to get ACPI companion device\n"); + return NULL; + } + + ctx = devm_kzalloc(&pdev->dev, sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return NULL; + + ctx->pin = + desc_to_gpio(desc) - + gpio_device_get_base(gpiod_to_gpio_device(desc)); + ctx->dev = &pdev->dev; + + parse_ctx.ctx = ctx; + parse_ctx.hp_dev = hp_dev; + + status = acpi_walk_resources(adev->handle, METHOD_NAME__CRS, + acpi_gpio_lookup_handler, &parse_ctx); + if (ACPI_FAILURE(status)) { + devm_kfree(&pdev->dev, ctx); + return NULL; + } + + if (ctx->valid) { + if (gpio_index == PCIE_PIN_BOOT && hp_dev->boot_pin == -1) { + hp_dev->boot_pin = ctx->pin; + } else if (gpio_index == PCIE_PIN_PRSNT + && hp_dev->prsnt_pin == -1) { + hp_dev->prsnt_pin = ctx->pin; + } + return ctx; + } + + devm_kfree(&pdev->dev, ctx); + return NULL; +} + +/** + * cx7_hp_setup_irq - Setup IRQ for GPIO + * @app_ctx: GPIO context + * + * Returns: 0 on success, negative error code on failure + */ +static int cx7_hp_setup_irq(struct cx7_hp_gpio_ctx *app_ctx) +{ + struct gpio_acpi_context *ctx = app_ctx->ctx; + int irq, ret; + + irq = gpiod_to_irq(app_ctx->desc); + if (irq < 0) { + dev_err(ctx->dev, "Failed to get IRQ for GPIO\n"); + return irq; + } + + if (ctx->wake_capable) + enable_irq_wake(irq); + + ret = devm_request_threaded_irq(ctx->dev, irq, + hotplug_irq_handler, cx7_hp_work, + ctx->irq_flags | IRQF_ONESHOT, + "pcie_hotplug", app_ctx); + if (ret) + dev_err(ctx->dev, "Failed to request IRQ %d: %d\n", irq, ret); + + return ret; +} + +/** + * cx7_hp_put_gpio_device - Release GPIO device reference + * @data: GPIO device pointer + */ +static void cx7_hp_put_gpio_device(void *data) +{ + struct gpio_device *gdev = data; + + gpio_device_put(gdev); +} + +/** + * cx7_hp_discover_pcie_devices - Discover existing PCI devices on managed ports + * @pdev: platform device + * @pd: platform data + * + * Returns: 0 on success, negative error code on failure + */ +static int cx7_hp_discover_pcie_devices(struct platform_device *pdev, + struct cx7_hp_plat_data *pd) +{ + struct pci_dev *pci_dev = NULL; + int device_count = 0; + int i; + + if (!pd->vendor_id || !pd->device_id) + return 0; + + while ((pci_dev = pci_get_device(pd->vendor_id, + pd->device_id, pci_dev)) != NULL) { + if (!pci_dev->state_saved) { + pci_dev_put(pci_dev); + return -EPROBE_DEFER; + } + + for (i = 0; i < pd->port_nums; i++) { + if (pci_domain_nr(pci_dev->bus) == pd->ports[i].domain) + break; + } + + if (i == pd->port_nums) { + dev_err(&pdev->dev, + "Device %s found on unexpected domain %d\n", + pci_name(pci_dev), pci_domain_nr(pci_dev->bus)); + pci_dev_put(pci_dev); + return -ENODEV; + } + + device_count++; + } + + if (pd->num_devices && device_count != pd->num_devices) { + dev_err(&pdev->dev, + "Required number of devices not found. Expected=%d Actual=%d\n", + pd->num_devices, device_count); + return -ENODEV; + } + + return 0; +} + +/** + * cx7_hp_init_pcie_data - Initialize PCIe data from _DSD and discover devices + * @pdev: platform device + * @pd: platform data to populate + * + * Returns: 0 on success, negative error code on failure + */ +static int cx7_hp_init_pcie_data(struct platform_device *pdev, + struct cx7_hp_plat_data *pd) +{ + int ret; + + ret = cx7_hp_parse_pcie_config_dsd(pdev, pd); + if (ret) { + dev_err(&pdev->dev, + "Failed to parse PCIe configuration _DSD properties: %d\n", + ret); + return ret; + } + + if (pd->port_nums == 0 || pd->port_nums >= HP_PORT_MAX) { + dev_err(&pdev->dev, + "Invalid port count from _DSD: %d (must be 1-%d)\n", + pd->port_nums, HP_PORT_MAX - 1); + return -EINVAL; + } + + ret = cx7_hp_discover_pcie_devices(pdev, pd); + if (ret) { + dev_dbg(&pdev->dev, "Device discovery failed: %d\n", ret); + return ret; + } + + return 0; +} + +/** + * cx7_hp_enumerate_gpios - Enumerate GPIOs from ACPI + * @pdev: Platform device + * @hp_dev: Hotplug device structure + * + * Returns: Number of GPIOs found, or negative error code + */ +static int cx7_hp_enumerate_gpios(struct platform_device *pdev, + struct cx7_hp_dev *hp_dev) +{ + struct acpi_gpio_walk_context walk_ctx; + struct fwnode_handle *gpio_fwnode = NULL; + struct acpi_device *gpio_adev = NULL; + acpi_handle gpio_handle; + acpi_status status; + int ret, i; + + ret = cx7_hp_walk_acpi_gpios(pdev, &walk_ctx); + if (ret) { + dev_err(&pdev->dev, "Failed to walk ACPI GPIO resources: %d\n", + ret); + return ret; + } + + if (walk_ctx.count < CX7_HP_MIN_GPIO_COUNT) { + dev_err(&pdev->dev, + "Insufficient GPIOs from ACPI: required at least %d, got %d\n", + CX7_HP_MIN_GPIO_COUNT, walk_ctx.count); + return -ENODEV; + } + + /* Find GPIO device using resource_source from first GPIO */ + if (walk_ctx.count == 0 || walk_ctx.gpios[0].resource_source[0] == '\0') { + dev_err(&pdev->dev, + "No resource_source in ACPI GPIO resources\n"); + return -ENODEV; + } + + status = + acpi_get_handle(NULL, walk_ctx.gpios[0].resource_source, + &gpio_handle); + if (ACPI_FAILURE(status)) { + dev_err(&pdev->dev, + "Failed to get ACPI handle for GPIO controller %s\n", + walk_ctx.gpios[0].resource_source); + return -ENODEV; + } + + gpio_adev = acpi_fetch_acpi_dev(gpio_handle); + if (!gpio_adev) { + dev_err(&pdev->dev, + "Failed to get ACPI device for GPIO controller %s\n", + walk_ctx.gpios[0].resource_source); + return -ENODEV; + } + + gpio_fwnode = acpi_fwnode_handle(gpio_adev); + hp_dev->gdev = gpio_device_find_by_fwnode(gpio_fwnode); + if (!hp_dev->gdev) { + return dev_err_probe(&pdev->dev, -EPROBE_DEFER, + "GPIO controller not available\n"); + } + + /* Successfully found GPIO device - manage reference */ + ret = devm_add_action_or_reset(&pdev->dev, cx7_hp_put_gpio_device, + hp_dev->gdev); + if (ret) { + gpio_device_put(hp_dev->gdev); + hp_dev->gdev = NULL; + dev_err(&pdev->dev, "Failed to register GPIO device cleanup\n"); + return ret; + } + + hp_dev->gpio_count = walk_ctx.count; + + hp_dev->pins = devm_kzalloc(&pdev->dev, + sizeof(struct cx7_hp_gpio_ctx) * + hp_dev->gpio_count, GFP_KERNEL); + if (!hp_dev->pins) { + dev_err(&pdev->dev, "Failed to allocate memory for GPIOs\n"); + return -ENOMEM; + } + + for (i = 0; i < hp_dev->gpio_count; i++) { + struct cx7_hp_gpio_ctx *app_ctx = &hp_dev->pins[i]; + + app_ctx->desc = + gpio_device_get_desc(hp_dev->gdev, walk_ctx.gpios[i].pin); + if (IS_ERR(app_ctx->desc)) { + dev_err(&pdev->dev, + "Failed to get GPIO descriptor for ACPI pin %u (index %d): %ld\n", + walk_ctx.gpios[i].pin, i, + PTR_ERR(app_ctx->desc)); + return PTR_ERR(app_ctx->desc); + } + + app_ctx->hp_dev = hp_dev; + } + + return hp_dev->gpio_count; +} + +/** + * cx7_hp_pci_notifier - PCI bus notifier to configure MPS for CX7 devices + * @nb: notifier block + * @action: bus notification action + * @data: pointer to device being added/removed + * + * Returns: NOTIFY_OK on success, NOTIFY_DONE if not a CX7 device + */ +static int cx7_hp_pci_notifier(struct notifier_block *nb, unsigned long action, + void *data) +{ + struct device *dev = data; + struct pci_dev *pdev = to_pci_dev(dev); + struct cx7_hp_dev *hp_dev; + unsigned long flags; + + if (action != BUS_NOTIFY_ADD_DEVICE) + return NOTIFY_DONE; + + hp_dev = container_of(nb, struct cx7_hp_dev, pci_notifier); + if (!hp_dev || !hp_dev->pd) + return NOTIFY_DONE; + + spin_lock_irqsave(&hp_dev->lock, flags); + if (!hp_dev->hotplug_enabled) { + spin_unlock_irqrestore(&hp_dev->lock, flags); + return NOTIFY_DONE; + } + spin_unlock_irqrestore(&hp_dev->lock, flags); + + if (!pdev || !hp_dev->pd->vendor_id || !hp_dev->pd->device_id) + return NOTIFY_DONE; + + if (pdev->vendor != hp_dev->pd->vendor_id || + pdev->device != hp_dev->pd->device_id) + return NOTIFY_DONE; + + if (pdev->bus) + pcie_bus_configure_settings(pdev->bus); + + return NOTIFY_OK; +} + +/** + * cx7_hp_probe - Platform device probe function + * @pdev: platform device + * + * Initializes the PCIe hotplug driver, parses ACPI resources, and sets up + * GPIO interrupts and sysfs interface. + * + * Returns: 0 on success, negative error code on failure + */ +static int cx7_hp_probe(struct platform_device *pdev) +{ + struct cx7_hp_plat_data *pd; + struct cx7_hp_gpio_ctx *app_ctx; + struct cx7_hp_dev *hp_dev; + int ret, i; + + pd = devm_kzalloc(&pdev->dev, sizeof(*pd), GFP_KERNEL); + if (!pd) { + dev_err(&pdev->dev, + "Failed to allocate memory for platform data\n"); + return -ENOMEM; + } + + ret = cx7_hp_init_pcie_data(pdev, pd); + if (ret) + return ret; + + hp_dev = devm_kzalloc(&pdev->dev, sizeof(*hp_dev), GFP_KERNEL); + if (!hp_dev) { + dev_err(&pdev->dev, + "Failed to allocate memory for hotplug device\n"); + return -ENOMEM; + } + + hp_dev->pdev = pdev; + hp_dev->pd = pd; + hp_dev->state = STATE_READY; + hp_dev->boot_pin = -1; + hp_dev->prsnt_pin = -1; + hp_dev->hotplug_enabled = false; + spin_lock_init(&hp_dev->lock); + + for (i = 0; i < HP_PORT_MAX; i++) + hp_dev->cached_root_ports[i] = NULL; + + ret = cx7_hp_enumerate_gpios(pdev, hp_dev); + if (ret < 0) { + dev_err(&pdev->dev, "Failed to enumerate GPIOs from ACPI: %d\n", + ret); + return ret; + } + + for (i = 0; i < hp_dev->gpio_count; i++) { + app_ctx = &hp_dev->pins[i]; + + app_ctx->ctx = gpio_acpi_setup(pdev, app_ctx->desc, hp_dev, i); + if (!app_ctx->ctx) { + dev_err(&pdev->dev, "Failed to setup GPIO %d\n", i); + return -ENODEV; + } + + gpiod_set_debounce(app_ctx->desc, + app_ctx->ctx->debounce_timeout_us); + + if (app_ctx->ctx->connection_type == + ACPI_RESOURCE_GPIO_TYPE_INT) { + ret = cx7_hp_setup_irq(app_ctx); + if (ret) { + dev_err(&pdev->dev, + "Failed to setup IRQ for GPIO %d\n", i); + return ret; + } + } + } + + platform_set_drvdata(pdev, hp_dev); + + ret = cx7_hp_pinctrl_init(hp_dev); + if (ret) { + dev_err(&pdev->dev, "Pinmux init failed, ret: %d\n", ret); + return ret; + } + + ret = sysfs_create_group(&pdev->dev.kobj, &cx7_hp_attr_group); + if (ret) { + dev_err(&pdev->dev, "Sysfs creation failed: %d\n", ret); + goto pinctrl_remove; + } + + cx7_hp_rp_bus_protect(hp_dev, 0, BUS_PROTECT_INIT); + + hp_dev->pci_notifier.notifier_call = cx7_hp_pci_notifier; + ret = bus_register_notifier(&pci_bus_type, &hp_dev->pci_notifier); + if (ret) { + dev_err(&pdev->dev, "Failed to register PCI bus notifier: %d\n", + ret); + goto sysfs_remove; + } + + if (gpiod_get_value(hp_dev->pins[PCIE_PIN_PRSNT].desc)) { + hp_dev->debug_state = CX7_HP_DEBUG_PLUG_OUT; + cx7_hp_send_uevent(hp_dev, REMOVAL_EVT); + } else { + hp_dev->debug_state = CX7_HP_DEBUG_PLUG_IN; + cx7_hp_send_uevent(hp_dev, PLUG_IN_EVT); + } + + dev_info(&pdev->dev, "PCIe hotplug driver initialized successfully\n"); + return 0; + +sysfs_remove: + sysfs_remove_group(&pdev->dev.kobj, &cx7_hp_attr_group); +pinctrl_remove: + cx7_hp_pinctrl_remove(hp_dev); + return ret; +} + +/** + * cx7_hp_remove - Platform device remove function + * @pdev: platform device + * + * Cleans up GPIO pins, pinctrl, sysfs interface, and bus protection. + */ +static void cx7_hp_remove(struct platform_device *pdev) +{ + struct cx7_hp_dev *hp_dev = platform_get_drvdata(pdev); + int i; + + if (!hp_dev) + return; + + sysfs_remove_group(&pdev->dev.kobj, &cx7_hp_attr_group); + + bus_unregister_notifier(&pci_bus_type, &hp_dev->pci_notifier); + + cx7_hp_rp_bus_protect(hp_dev, 0, BUS_PROTECT_CLEANUP); + + cx7_hp_pinctrl_remove(hp_dev); + + for (i = 0; i < hp_dev->pd->port_nums; i++) { + if (hp_dev->cached_root_ports[i]) + pci_dev_put(hp_dev->cached_root_ports[i]); + } + + platform_set_drvdata(pdev, NULL); +} + +static const struct acpi_device_id cx7_hp_acpi_match[] = { + {"MTKP0001", 0}, + {} +}; + +MODULE_DEVICE_TABLE(acpi, cx7_hp_acpi_match); + +static struct platform_driver cx7_hp_driver = { + .probe = cx7_hp_probe, + .remove = cx7_hp_remove, + .driver = { + .name = "cx7-pcie-hotplug", + .acpi_match_table = ACPI_PTR(cx7_hp_acpi_match), + }, +}; + +module_platform_driver(cx7_hp_driver); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("CX7 PCIe Hotplug Driver for NVIDIA DGX Systems"); diff --git a/drivers/resctrl/Kconfig b/drivers/resctrl/Kconfig new file mode 100644 index 0000000000000..b6d15ca5b6495 --- /dev/null +++ b/drivers/resctrl/Kconfig @@ -0,0 +1,30 @@ +menuconfig ARM64_MPAM_DRIVER + bool "MPAM driver" + depends on ARM64 && ARM64_MPAM + select ACPI_MPAM if ACPI + help + MPAM driver for System IP, e,g. caches and memory controllers. + +if ARM64_MPAM_DRIVER +config ARM64_MPAM_DRIVER_DEBUG + bool "Enable debug messages from the MPAM driver" + help + Say yes here to enable debug messages from the MPAM driver. + +config MPAM_KUNIT_TEST + bool "KUnit tests for MPAM driver " if !KUNIT_ALL_TESTS + depends on KUNIT=y + default KUNIT_ALL_TESTS + help + Enable this option to run tests in the MPAM driver. + + If unsure, say N. + +endif + +config ARM64_MPAM_RESCTRL_FS + bool + default y if ARM64_MPAM_DRIVER && RESCTRL_FS + select RESCTRL_RMID_DEPENDS_ON_CLOSID + select RESCTRL_ASSIGN_FIXED + select RESCTRL_IOMMU if ARM_SMMU_V3 diff --git a/drivers/resctrl/Makefile b/drivers/resctrl/Makefile new file mode 100644 index 0000000000000..097c036724e97 --- /dev/null +++ b/drivers/resctrl/Makefile @@ -0,0 +1,5 @@ +obj-$(CONFIG_ARM64_MPAM_DRIVER) += mpam.o +mpam-y += mpam_devices.o mpam_fb.o +mpam-$(CONFIG_ARM64_MPAM_RESCTRL_FS) += mpam_resctrl.o + +ccflags-$(CONFIG_ARM64_MPAM_DRIVER_DEBUG) += -DDEBUG diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c new file mode 100644 index 0000000000000..f0740b5d59b5b --- /dev/null +++ b/drivers/resctrl/mpam_devices.c @@ -0,0 +1,3670 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2025 Arm Ltd. + +#define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "mpam_internal.h" +#include "mpam_fb.h" + +/* Values for the T241 errata workaround */ +#define T241_CHIPS_MAX 4 +#define T241_CHIP_NSLICES 12 +#define T241_SPARE_REG0_OFF 0x1b0000 +#define T241_SPARE_REG1_OFF 0x1c0000 +#define T241_CHIP_ID(phys) FIELD_GET(GENMASK_ULL(44, 43), phys) +#define T241_SHADOW_REG_OFF(sidx, pid) (0x360048 + (sidx) * 0x10000 + (pid) * 8) +#define SMCCC_SOC_ID_T241 0x036b0241 +static void __iomem *t241_scratch_regs[T241_CHIPS_MAX]; + +/* + * mpam_list_lock protects the SRCU lists when writing. Once the + * mpam_enabled key is enabled these lists are read-only, + * unless the error interrupt disables the driver. + */ +static DEFINE_MUTEX(mpam_list_lock); +static LIST_HEAD(mpam_all_msc); + +struct srcu_struct mpam_srcu; + +/* + * Number of MSCs that have been probed. Once all MSC have been probed MPAM + * can be enabled. + */ +static atomic_t mpam_num_msc; + +static int mpam_cpuhp_state; +static DEFINE_MUTEX(mpam_cpuhp_state_lock); + +/* + * The smallest common values for any CPU or MSC in the system. + * Generating traffic outside this range will result in screaming interrupts. + */ +u16 mpam_partid_max; +u8 mpam_pmg_max; +static bool partid_max_init, partid_max_published; +static u16 mpam_cmdline_partid_max; +static bool mpam_cmdline_partid_max_overridden; +static DEFINE_SPINLOCK(partid_max_lock); + +/* + * mpam is enabled once all devices have been probed from CPU online callbacks, + * scheduled via this work_struct. If access to an MSC depends on a CPU that + * was not brought online at boot, this can happen surprisingly late. + */ +static DECLARE_WORK(mpam_enable_work, &mpam_enable); + +/* + * All mpam error interrupts indicate a software bug. On receipt, disable the + * driver. + */ +static DECLARE_WORK(mpam_broken_work, &mpam_disable); + +/* When mpam is disabled, the printed reason to aid debugging */ +static char *mpam_disable_reason; + +static struct dentry *mpam_debugfs; + +/* + * Whether has been setup. Used by cpuhp in preference to mpam_is_enabled() + * the disable call after an error interrupt makes mpam_is_enabled() false before + * the cpuhp callbacks are made. + * Reads/writes should hold mpam_cpuhp_state_lock, (or be cpuhp callbacks). + */ +static bool mpam_resctrl_enabled; + +/* + * An MSC is a physical container for controls and monitors, each identified by + * their RIS index. These share a base-address, interrupts and some MMIO + * registers. A vMSC is a virtual container for RIS in an MSC that control or + * monitor the same thing. Members of a vMSC are all RIS in the same MSC, but + * not all RIS in an MSC share a vMSC. + * Components are a group of vMSC that control or monitor the same thing but + * are from different MSC, so have different base-address, interrupts etc. + * Classes are the set components of the same type. + * + * The features of a vMSC is the union of the RIS it contains. + * The features of a Class and Component are the common subset of the vMSC + * they contain. + * + * e.g. The system cache may have bandwidth controls on multiple interfaces, + * for regulating traffic from devices independently of traffic from CPUs. + * If these are two RIS in one MSC, they will be treated as controlling + * different things, and will not share a vMSC/component/class. + * + * e.g. The L2 may have one MSC and two RIS, one for cache-controls another + * for bandwidth. These two RIS are members of the same vMSC. + * + * e.g. The set of RIS that make up the L2 are grouped as a component. These + * are sometimes termed slices. They should be configured the same, as if there + * were only one. + * + * e.g. The SoC probably has more than one L2, each attached to a distinct set + * of CPUs. All the L2 components are grouped as a class. + * + * When creating an MSC, struct mpam_msc is added to the all mpam_all_msc list, + * then linked via struct mpam_ris to a vmsc, component and class. + * The same MSC may exist under different class->component->vmsc paths, but the + * RIS index will be unique. + */ +LIST_HEAD(mpam_classes); + +/* List of all objects that can be free()d after synchronise_srcu() */ +static LLIST_HEAD(mpam_garbage); + +static inline void init_garbage(struct mpam_garbage *garbage) +{ + init_llist_node(&garbage->llist); +} + +/* + * Once mpam is enabled, new requestors cannot further reduce the available + * partid. Assert that the size is fixed, and new requestors will be turned + * away. + */ +static void mpam_assert_partid_sizes_fixed(void) +{ + WARN_ON_ONCE(!partid_max_published); +} + +static u32 __mpam_read_reg(struct mpam_msc *msc, u16 reg) +{ + WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility)); + + if (msc->iface == MPAM_IFACE_SCMI) { + u32 ret; + + mpam_fb_send_read_request(&msc->mpam_fb_chan, + msc->mpam_fb_msc_id, reg, &ret); + return ret; + } + + WARN_ON_ONCE(reg + sizeof(u32) > msc->mapped_hwpage_sz); + + return readl_relaxed(msc->mapped_hwpage + reg); +} + +static inline u32 _mpam_read_partsel_reg(struct mpam_msc *msc, u16 reg) +{ + lockdep_assert_held_once(&msc->part_sel_lock); + return __mpam_read_reg(msc, reg); +} + +#define mpam_read_partsel_reg(msc, reg) _mpam_read_partsel_reg(msc, MPAMF_##reg) + +static void __mpam_write_reg(struct mpam_msc *msc, u16 reg, u32 val) +{ + WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility)); + + if (msc->iface == MPAM_IFACE_SCMI) { + mpam_fb_send_write_request(&msc->mpam_fb_chan, + msc->mpam_fb_msc_id, reg, val); + } else { + WARN_ON_ONCE(reg + sizeof(u32) >= msc->mapped_hwpage_sz); + writel_relaxed(val, msc->mapped_hwpage + reg); + } +} + +static inline void _mpam_write_partsel_reg(struct mpam_msc *msc, u16 reg, u32 val) +{ + lockdep_assert_held_once(&msc->part_sel_lock); + __mpam_write_reg(msc, reg, val); +} +#define mpam_write_partsel_reg(msc, reg, val) _mpam_write_partsel_reg(msc, MPAMCFG_##reg, val) + +static inline u32 _mpam_read_monsel_reg(struct mpam_msc *msc, u16 reg) +{ + mpam_mon_sel_lock_held(msc); + return __mpam_read_reg(msc, reg); +} +#define mpam_read_monsel_reg(msc, reg) _mpam_read_monsel_reg(msc, MSMON_##reg) + +static inline void _mpam_write_monsel_reg(struct mpam_msc *msc, u16 reg, u32 val) +{ + mpam_mon_sel_lock_held(msc); + __mpam_write_reg(msc, reg, val); +} +#define mpam_write_monsel_reg(msc, reg, val) _mpam_write_monsel_reg(msc, MSMON_##reg, val) + +static u64 mpam_msc_read_idr(struct mpam_msc *msc) +{ + u64 idr_high = 0, idr_low; + + lockdep_assert_held(&msc->part_sel_lock); + + idr_low = mpam_read_partsel_reg(msc, IDR); + if (FIELD_GET(MPAMF_IDR_EXT, idr_low)) + idr_high = mpam_read_partsel_reg(msc, IDR + 4); + + return (idr_high << 32) | idr_low; +} + +static void mpam_msc_clear_esr(struct mpam_msc *msc) +{ + u64 esr_low = __mpam_read_reg(msc, MPAMF_ESR); + if (!esr_low) + return; + + /* + * Clearing the high/low bits of MPAMF_ESR can not be atomic. + * Clear the top half first, so that the pending error bits in the + * lower half prevent hardware from updating either half of the + * register. + */ + if (msc->has_extd_esr) + __mpam_write_reg(msc, MPAMF_ESR + 4, 0); + __mpam_write_reg(msc, MPAMF_ESR, 0); +} + +static u64 mpam_msc_read_esr(struct mpam_msc *msc) +{ + u64 esr_high = 0, esr_low; + + esr_low = __mpam_read_reg(msc, MPAMF_ESR); + if (msc->has_extd_esr) + esr_high = __mpam_read_reg(msc, MPAMF_ESR + 4); + + return (esr_high << 32) | esr_low; +} + +static void __mpam_part_sel_raw(u32 partsel, struct mpam_msc *msc) +{ + lockdep_assert_held(&msc->part_sel_lock); + + mpam_write_partsel_reg(msc, PART_SEL, partsel); +} + +static void __mpam_part_sel(u8 ris_idx, u16 partid, struct mpam_msc *msc) +{ + u32 partsel = FIELD_PREP(MPAMCFG_PART_SEL_RIS, ris_idx) | + FIELD_PREP(MPAMCFG_PART_SEL_PARTID_SEL, partid); + + __mpam_part_sel_raw(partsel, msc); +} + +static void __mpam_intpart_sel(u8 ris_idx, u16 intpartid, struct mpam_msc *msc) +{ + u32 partsel = FIELD_PREP(MPAMCFG_PART_SEL_RIS, ris_idx) | + FIELD_PREP(MPAMCFG_PART_SEL_PARTID_SEL, intpartid) | + MPAMCFG_PART_SEL_INTERNAL; + + __mpam_part_sel_raw(partsel, msc); +} + +int mpam_register_requestor(u16 partid_max, u8 pmg_max) +{ + guard(spinlock)(&partid_max_lock); + if (!partid_max_init) { + mpam_partid_max = partid_max; + mpam_pmg_max = pmg_max; + partid_max_init = true; + } else if (!partid_max_published) { + mpam_partid_max = min(mpam_partid_max, partid_max); + mpam_pmg_max = min(mpam_pmg_max, pmg_max); + } else { + /* New requestors can't lower the values */ + if (partid_max < mpam_partid_max || pmg_max < mpam_pmg_max) + return -EBUSY; + } + + if (mpam_cmdline_partid_max_overridden) + mpam_partid_max = min(mpam_cmdline_partid_max, mpam_partid_max); + + return 0; +} +EXPORT_SYMBOL(mpam_register_requestor); + +static struct mpam_vmsc * +mpam_vmsc_alloc(struct mpam_component *comp, struct mpam_msc *msc) +{ + struct mpam_vmsc *vmsc; + + lockdep_assert_held(&mpam_list_lock); + + vmsc = kzalloc(sizeof(*vmsc), GFP_KERNEL); + if (!vmsc) + return ERR_PTR(-ENOMEM); + init_garbage(&vmsc->garbage); + + INIT_LIST_HEAD_RCU(&vmsc->ris); + INIT_LIST_HEAD_RCU(&vmsc->comp_list); + vmsc->comp = comp; + vmsc->msc = msc; + + list_add_rcu(&vmsc->comp_list, &comp->vmsc); + + return vmsc; +} + +static struct mpam_vmsc *mpam_vmsc_find(struct mpam_component *comp, + struct mpam_msc *msc) +{ + struct mpam_vmsc *vmsc; + + lockdep_assert_held(&mpam_list_lock); + + list_for_each_entry(vmsc, &comp->vmsc, comp_list) { + if (vmsc->msc->id == msc->id) + return vmsc; + } + + return mpam_vmsc_alloc(comp, msc); +} + +static struct mpam_component * +mpam_component_alloc(struct mpam_class *class, u32 id) +{ + struct mpam_component *comp; + + lockdep_assert_held(&mpam_list_lock); + + comp = kzalloc(sizeof(*comp), GFP_KERNEL); + if (!comp) + return ERR_PTR(-ENOMEM); + init_garbage(&comp->garbage); + + comp->comp_id = id; + INIT_LIST_HEAD_RCU(&comp->vmsc); + /* affinity is updated when ris are added */ + INIT_LIST_HEAD_RCU(&comp->class_list); + comp->class = class; + + list_add_rcu(&comp->class_list, &class->components); + + return comp; +} + +static struct mpam_component * +mpam_component_find(struct mpam_class *class, u32 id) +{ + struct mpam_component *comp; + + lockdep_assert_held(&mpam_list_lock); + + list_for_each_entry(comp, &class->components, class_list) { + if (comp->comp_id == id) + return comp; + } + + return mpam_component_alloc(class, id); +} + +static struct mpam_class * +mpam_class_alloc(u8 level_idx, enum mpam_class_types type) +{ + struct mpam_class *class; + + lockdep_assert_held(&mpam_list_lock); + + class = kzalloc(sizeof(*class), GFP_KERNEL); + if (!class) + return ERR_PTR(-ENOMEM); + init_garbage(&class->garbage); + + INIT_LIST_HEAD_RCU(&class->components); + /* affinity is updated when ris are added */ + class->level = level_idx; + class->type = type; + INIT_LIST_HEAD_RCU(&class->classes_list); + ida_init(&class->ida_csu_mon); + ida_init(&class->ida_mbwu_mon); + + list_add_rcu(&class->classes_list, &mpam_classes); + + return class; +} + +static struct mpam_class * +mpam_class_find(u8 level_idx, enum mpam_class_types type) +{ + struct mpam_class *class; + + lockdep_assert_held(&mpam_list_lock); + + list_for_each_entry(class, &mpam_classes, classes_list) { + if (class->type == type && class->level == level_idx) + return class; + } + + return mpam_class_alloc(level_idx, type); +} + +#define add_to_garbage(x) \ +do { \ + __typeof__(x) _x = (x); \ + _x->garbage.to_free = _x; \ + llist_add(&_x->garbage.llist, &mpam_garbage); \ +} while (0) + +static void mpam_class_destroy(struct mpam_class *class) +{ + lockdep_assert_held(&mpam_list_lock); + + debugfs_remove_recursive(class->debugfs); + class->debugfs = NULL; + list_del_rcu(&class->classes_list); + add_to_garbage(class); +} + +static void __destroy_component_cfg(struct mpam_component *comp); + +static void mpam_comp_destroy(struct mpam_component *comp) +{ + struct mpam_class *class = comp->class; + + lockdep_assert_held(&mpam_list_lock); + + __destroy_component_cfg(comp); + + debugfs_remove_recursive(comp->debugfs); + comp->debugfs = NULL; + list_del_rcu(&comp->class_list); + add_to_garbage(comp); + + if (list_empty(&class->components)) + mpam_class_destroy(class); +} + +static void mpam_vmsc_destroy(struct mpam_vmsc *vmsc) +{ + struct mpam_component *comp = vmsc->comp; + + lockdep_assert_held(&mpam_list_lock); + + debugfs_remove_recursive(vmsc->debugfs); + vmsc->debugfs = NULL; + list_del_rcu(&vmsc->comp_list); + add_to_garbage(vmsc); + + if (list_empty(&comp->vmsc)) + mpam_comp_destroy(comp); +} + +static void mpam_ris_destroy(struct mpam_msc_ris *ris) +{ + struct mpam_vmsc *vmsc = ris->vmsc; + struct mpam_msc *msc = vmsc->msc; + struct mpam_component *comp = vmsc->comp; + struct mpam_class *class = comp->class; + + lockdep_assert_held(&mpam_list_lock); + + /* + * Once a RIS has been removed from a class, it can no longer be used + * by resctrl, even though the class has yet to be removed. + */ + mpam_resctrl_teardown_class(class); + + /* + * It is assumed affinities don't overlap. If they do the class becomes + * unusable immediately. + */ + cpumask_andnot(&comp->affinity, &comp->affinity, &ris->affinity); + cpumask_andnot(&class->affinity, &class->affinity, &ris->affinity); + clear_bit(ris->ris_idx, &msc->ris_idxs); + debugfs_remove_recursive(ris->debugfs); + ris->debugfs = NULL; + list_del_rcu(&ris->vmsc_list); + list_del_rcu(&ris->msc_list); + add_to_garbage(ris); + + if (list_empty(&vmsc->ris)) + mpam_vmsc_destroy(vmsc); +} + +static void mpam_free_garbage(void) +{ + struct mpam_garbage *iter, *tmp; + struct llist_node *to_free = llist_del_all(&mpam_garbage); + + if (!to_free) + return; + + synchronize_srcu(&mpam_srcu); + + llist_for_each_entry_safe(iter, tmp, to_free, llist) { + if (iter->pdev) + devm_kfree(&iter->pdev->dev, iter->to_free); + else + kfree(iter->to_free); + } +} + +static DEFINE_XARRAY(mpam_pcc_channels); + +struct mpam_pcc_chan { + u32 refs; + u32 subspace_id; + struct pcc_mbox_chan *channel; + struct mbox_client pcc_cl; + + struct mpam_garbage garbage; +}; + +static struct mpam_pcc_chan *__mpam_pcc_alloc(u8 subspace_id, gfp_t gfp) +{ + struct mpam_pcc_chan *chan __free(kfree) = kzalloc(sizeof(*chan), gfp); + + lockdep_assert_held(&mpam_list_lock); + + if (!chan) + return ERR_PTR(-ENOMEM); + + chan->refs = 1; + chan->subspace_id = subspace_id; + /* + * TODO is the device important - these subspace_id can be re-used, so + * there is no one device to put here ... + */ + chan->pcc_cl.rx_callback = mpam_pcc_rx_callback; + chan->pcc_cl.tx_block = false; + chan->pcc_cl.tx_tout = 1000; /* 1s */ + chan->pcc_cl.knows_txdone = false; + + chan->channel = pcc_mbox_request_channel(&chan->pcc_cl, subspace_id); + if (IS_ERR(chan->channel)) + return ERR_CAST(chan->channel); + + init_garbage(&chan->garbage); + xa_store(&mpam_pcc_channels, subspace_id, chan, gfp); + + return_ptr(chan); +} + +static struct pcc_mbox_chan *mpam_pcc_alloc(u8 subspace_id, gfp_t gfp) +{ + struct mpam_pcc_chan *chan = __mpam_pcc_alloc(subspace_id, gfp); + return IS_ERR(chan) ? ERR_CAST(chan) : chan->channel; +} + +static struct pcc_mbox_chan *mpam_pcc_get(u8 subspace_id, bool alloc, gfp_t gfp) +{ + struct mpam_pcc_chan *chan; + + lockdep_assert_held(&mpam_list_lock); + + chan = xa_load(&mpam_pcc_channels, subspace_id); + if (chan) { + chan->refs++; + return chan->channel; + } + + if (!alloc) + return ERR_PTR(-ENOENT); + + return mpam_pcc_alloc(subspace_id, gfp); +} + +static void mpam_pcc_put(u8 subspace_id) +{ + struct mpam_pcc_chan *chan; + + lockdep_assert_held(&mpam_list_lock); + + chan = xa_load(&mpam_pcc_channels, subspace_id); + if (!chan) + return; + + chan->refs--; + if (!chan->refs) { + xa_erase(&mpam_pcc_channels, subspace_id); + pcc_mbox_free_channel(chan->channel); + add_to_garbage(chan); + } +} + +/* Called recursively to walk the list of caches from a particular CPU */ +static void __mpam_get_cpumask_from_cache_id(int cpu, struct device_node *cache_node, + unsigned long cache_id, + u32 cache_level, + cpumask_t *affinity) +{ + int err; + u32 iter_level; + unsigned long iter_cache_id; + struct device_node *iter_node __free(device_node) = of_find_next_cache_node(cache_node); + + if (!iter_node) + return; + + err = of_property_read_u32(iter_node, "cache-level", &iter_level); + if (err) + return; + + /* + * get_cpu_cacheinfo_id() isn't ready until sometime + * during device_initcall(). Use cache_of_calculate_id(). + */ + iter_cache_id = cache_of_calculate_id(iter_node); + if (iter_cache_id == ~0UL) + return; + + if (iter_level == cache_level && iter_cache_id == cache_id) + cpumask_set_cpu(cpu, affinity); + + if (iter_level < cache_level) + __mpam_get_cpumask_from_cache_id(cpu, iter_node, cache_id, + cache_level, affinity); +} + +/* + * The cacheinfo structures are only populated when CPUs are online. + * This helper walks the device tree to include offline CPUs too. + */ +int mpam_get_cpumask_from_cache_id(unsigned long cache_id, u32 cache_level, + cpumask_t *affinity) +{ + int cpu; + + if (!acpi_disabled) + return acpi_pptt_get_cpumask_from_cache_id(cache_id, affinity); + + for_each_possible_cpu(cpu) { + struct device_node *cpu_node __free(device_node) = of_get_cpu_node(cpu, NULL); + if (!cpu_node) { + pr_err("Failed to find cpu%d device node\n", cpu); + return -ENOENT; + } + + __mpam_get_cpumask_from_cache_id(cpu, cpu_node, cache_id, + cache_level, affinity); + continue; + } + + return 0; +} + +static int get_cpumask_from_cache(struct device_node *cache, + cpumask_t *affinity) +{ + int err; + u32 cache_level; + unsigned long cache_id; + + err = of_property_read_u32(cache, "cache-level", &cache_level); + if (err) { + pr_err("Failed to read cache-level from cache node\n"); + return -ENOENT; + } + + cache_id = cache_of_calculate_id(cache); + if (cache_id == ~0UL) { + pr_err("Failed to calculate cache-id from cache node\n"); + return -ENOENT; + } + + return mpam_get_cpumask_from_cache_id(cache_id, cache_level, affinity); +} + +/* + * cpumask_of_node() only knows about online CPUs. This can't tell us whether + * a class is represented on all possible CPUs. + */ +static void get_cpumask_from_node_id(u32 node_id, cpumask_t *affinity) +{ + int cpu; + + for_each_possible_cpu(cpu) { + if (node_id == cpu_to_node(cpu)) + cpumask_set_cpu(cpu, affinity); + } +} + +static int mpam_ris_get_affinity(struct mpam_msc *msc, cpumask_t *affinity, + enum mpam_class_types type, + struct mpam_class *class, + struct mpam_component *comp) +{ + int err; + + switch (type) { + case MPAM_CLASS_CACHE: + err = mpam_get_cpumask_from_cache_id(comp->comp_id, class->level, + affinity); + if (err) + return err; + + if (cpumask_empty(affinity)) + dev_warn_once(&msc->pdev->dev, + "no CPUs associated with cache node\n"); + + break; + case MPAM_CLASS_MEMORY: + get_cpumask_from_node_id(comp->comp_id, affinity); + /* affinity may be empty for CPU-less memory nodes */ + break; + case MPAM_CLASS_UNKNOWN: + return 0; + } + + cpumask_and(affinity, affinity, &msc->accessibility); + + return 0; +} + +static int mpam_ris_create_locked(struct mpam_msc *msc, u8 ris_idx, + enum mpam_class_types type, u8 class_id, + u32 component_id) +{ + int err; + struct mpam_vmsc *vmsc; + struct mpam_msc_ris *ris; + struct mpam_class *class; + struct mpam_component *comp; + struct platform_device *pdev = msc->pdev; + + lockdep_assert_held(&mpam_list_lock); + + if (ris_idx > MPAM_MSC_MAX_NUM_RIS) + return -EINVAL; + + if (test_and_set_bit(ris_idx, &msc->ris_idxs)) + return -EBUSY; + + ris = devm_kzalloc(&msc->pdev->dev, sizeof(*ris), GFP_KERNEL); + if (!ris) + return -ENOMEM; + init_garbage(&ris->garbage); + ris->garbage.pdev = pdev; + + class = mpam_class_find(class_id, type); + if (IS_ERR(class)) + return PTR_ERR(class); + + comp = mpam_component_find(class, component_id); + if (IS_ERR(comp)) { + if (list_empty(&class->components)) + mpam_class_destroy(class); + return PTR_ERR(comp); + } + + vmsc = mpam_vmsc_find(comp, msc); + if (IS_ERR(vmsc)) { + if (list_empty(&comp->vmsc)) + mpam_comp_destroy(comp); + return PTR_ERR(vmsc); + } + + err = mpam_ris_get_affinity(msc, &ris->affinity, type, class, comp); + if (err) { + if (list_empty(&vmsc->ris)) + mpam_vmsc_destroy(vmsc); + return err; + } + + ris->ris_idx = ris_idx; + INIT_LIST_HEAD_RCU(&ris->msc_list); + INIT_LIST_HEAD_RCU(&ris->vmsc_list); + ris->vmsc = vmsc; + + cpumask_or(&comp->affinity, &comp->affinity, &ris->affinity); + cpumask_or(&class->affinity, &class->affinity, &ris->affinity); + list_add_rcu(&ris->vmsc_list, &vmsc->ris); + list_add_rcu(&ris->msc_list, &msc->ris); + + return 0; +} + +int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx, + enum mpam_class_types type, u8 class_id, u32 component_id) +{ + int err; + + if (mpam_force_unknown_msc_test(msc)) + type = MPAM_CLASS_UNKNOWN; + + mutex_lock(&mpam_list_lock); + err = mpam_ris_create_locked(msc, ris_idx, type, class_id, + component_id); + mutex_unlock(&mpam_list_lock); + if (err) + mpam_free_garbage(); + + return err; +} + +static struct mpam_msc_ris *mpam_get_or_create_ris(struct mpam_msc *msc, + u8 ris_idx) +{ + int err; + struct mpam_msc_ris *ris; + + lockdep_assert_held(&mpam_list_lock); + + if (!test_bit(ris_idx, &msc->ris_idxs)) { + err = mpam_ris_create_locked(msc, ris_idx, MPAM_CLASS_UNKNOWN, + 0, 0); + if (err) + return ERR_PTR(err); + } + + list_for_each_entry(ris, &msc->ris, msc_list) { + if (ris->ris_idx == ris_idx) { + return ris; + } + } + + return ERR_PTR(-ENOENT); +} + +static void mpam_enable_quirk_nvidia_t241(struct mpam_msc *msc, + const struct mpam_quirk *quirk) +{ + s32 soc_id = arm_smccc_get_soc_id_version(); + struct resource *r; + phys_addr_t phys; + + /* + * A mapping to a device other than the MSC is needed, check + * SOC_ID is NVIDIA T241 chip (036b:0241) + */ + if (soc_id < 0 || soc_id != SMCCC_SOC_ID_T241) + return; + + r = platform_get_resource(msc->pdev, IORESOURCE_MEM, 0); + if (!r) + return; + + /* Find the internal registers base addr from the CHIP ID */ + msc->t241_id = T241_CHIP_ID(r->start); + phys = FIELD_PREP(GENMASK_ULL(45, 44), msc->t241_id) | 0x19000000ULL; + + t241_scratch_regs[msc->t241_id] = ioremap(phys, SZ_8M); + if (WARN_ON_ONCE(!t241_scratch_regs[msc->t241_id])) + return; + + mpam_set_quirk(quirk->workaround, msc); + pr_info_once("Enabled workaround for NVIDIA T241 erratum T241-MPAM-1\n"); +} + +static const struct mpam_quirk mpam_quirks[] = { + { + /* NVIDIA t241 erratum T241-MPAM-1 */ + .init = mpam_enable_quirk_nvidia_t241, + .iidr = IIDR_PROD(0x241) | IIDR_VAR(0) | IIDR_REV(0) | IIDR_IMP(0x36b), + .iidr_mask = IIDR_MATCH_ONE, + .workaround = T241_SCRUB_SHADOW_REGS, + }, + { + /* NVIDIA t241 erratum T241-MPAM-4 */ + .iidr = IIDR_PROD(0x241) | IIDR_VAR(0) | IIDR_REV(0) | IIDR_IMP(0x36b), + .iidr_mask = IIDR_MATCH_ONE, + .workaround = T241_FORCE_MBW_MIN_TO_ONE, + }, + { + /* NVIDIA t241 erratum T241-MPAM-6 */ + .iidr = IIDR_PROD(0x241) | IIDR_VAR(0) | IIDR_REV(0) | IIDR_IMP(0x36b), + .iidr_mask = IIDR_MATCH_ONE, + .workaround = T241_MBW_COUNTER_SCALE_64, + }, + { + /* ARM CMN-650 CSU erratum 3642720 */ + .iidr = IIDR_PROD(0) | IIDR_VAR(0) | IIDR_REV(0) | IIDR_IMP(0x43b), + .iidr_mask = IIDR_MATCH_ONE, + .workaround = IGNORE_CSU_NRDY, + }, + { NULL }, /* Sentinel */ +}; + +static void mpam_enable_quirks(struct mpam_msc *msc) +{ + const struct mpam_quirk *quirk; + + for (quirk = &mpam_quirks[0]; quirk->iidr_mask; quirk++) { + if (quirk->iidr != (msc->iidr & quirk->iidr_mask)) + continue; + + if (quirk->init) + quirk->init(msc, quirk); + else + mpam_set_quirk(quirk->workaround, msc); + } +} + +/* + * IHI009A.a has this nugget: "If a monitor does not support automatic behaviour + * of NRDY, software can use this bit for any purpose" - so hardware might not + * implement this - but it isn't RES0. + * + * Try and see what values stick in this bit. If we can write either value, + * its probably not implemented by hardware. + */ +static bool _mpam_ris_hw_probe_hw_nrdy(struct mpam_msc_ris *ris, u32 mon_reg) +{ + u32 now; + u64 mon_sel; + bool can_set, can_clear; + struct mpam_msc *msc = ris->vmsc->msc; + + if (WARN_ON_ONCE(!mpam_mon_sel_inner_lock(msc))) + return false; + + mon_sel = FIELD_PREP(MSMON_CFG_MON_SEL_MON_SEL, 0) | + FIELD_PREP(MSMON_CFG_MON_SEL_RIS, ris->ris_idx); + _mpam_write_monsel_reg(msc, mon_reg, mon_sel); + + _mpam_write_monsel_reg(msc, mon_reg, MSMON___NRDY); + now = _mpam_read_monsel_reg(msc, mon_reg); + can_set = now & MSMON___NRDY; + + _mpam_write_monsel_reg(msc, mon_reg, 0); + now = _mpam_read_monsel_reg(msc, mon_reg); + can_clear = !(now & MSMON___NRDY); + mpam_mon_sel_inner_unlock(msc); + + return (!can_set || !can_clear); +} + +#define mpam_ris_hw_probe_hw_nrdy(_ris, _mon_reg) \ + _mpam_ris_hw_probe_hw_nrdy(_ris, MSMON_##_mon_reg) + +static void mpam_ris_hw_probe(struct mpam_msc_ris *ris) +{ + int err; + struct mpam_msc *msc = ris->vmsc->msc; + struct device *dev = &msc->pdev->dev; + struct mpam_props *props = &ris->props; + struct mpam_class *class = ris->vmsc->comp->class; + + lockdep_assert_held(&msc->probe_lock); + lockdep_assert_held(&msc->part_sel_lock); + + /* Cache Capacity Partitioning */ + if (FIELD_GET(MPAMF_IDR_HAS_CCAP_PART, ris->idr)) { + ris->ccap_idr = mpam_read_partsel_reg(msc, CCAP_IDR); + + props->cmax_wd = FIELD_GET(MPAMF_CCAP_IDR_CMAX_WD, ris->ccap_idr); + if (props->cmax_wd && + FIELD_GET(MPAMF_CCAP_IDR_HAS_CMAX_SOFTLIM, ris->ccap_idr)) + mpam_set_feature(mpam_feat_cmax_softlim, props); + + if (props->cmax_wd && + !FIELD_GET(MPAMF_CCAP_IDR_NO_CMAX, ris->ccap_idr)) + mpam_set_feature(mpam_feat_cmax_cmax, props); + + if (props->cmax_wd && + FIELD_GET(MPAMF_CCAP_IDR_HAS_CMIN, ris->ccap_idr)) + mpam_set_feature(mpam_feat_cmax_cmin, props); + + props->cassoc_wd = FIELD_GET(MPAMF_CCAP_IDR_CASSOC_WD, ris->ccap_idr); + if (props->cassoc_wd && + FIELD_GET(MPAMF_CCAP_IDR_HAS_CASSOC, ris->ccap_idr)) + mpam_set_feature(mpam_feat_cmax_cassoc, props); + } + + /* Cache Portion partitioning */ + if (FIELD_GET(MPAMF_IDR_HAS_CPOR_PART, ris->idr)) { + ris->cpor_idr = mpam_read_partsel_reg(msc, CPOR_IDR); + + props->cpbm_wd = FIELD_GET(MPAMF_CPOR_IDR_CPBM_WD, ris->cpor_idr); + if (props->cpbm_wd) + mpam_set_feature(mpam_feat_cpor_part, props); + } + + /* Memory bandwidth partitioning */ + if (FIELD_GET(MPAMF_IDR_HAS_MBW_PART, ris->idr)) { + u32 mbw_features = mpam_read_partsel_reg(msc, MBW_IDR); + + /* portion bitmap resolution */ + props->mbw_pbm_bits = FIELD_GET(MPAMF_MBW_IDR_BWPBM_WD, mbw_features); + if (props->mbw_pbm_bits && + FIELD_GET(MPAMF_MBW_IDR_HAS_PBM, mbw_features)) + mpam_set_feature(mpam_feat_mbw_part, props); + + props->bwa_wd = FIELD_GET(MPAMF_MBW_IDR_BWA_WD, mbw_features); + + /* + * The BWA_WD field can represent 0-63, but the control fields it + * describes have a maximum of 16 bits. + */ + props->bwa_wd = min(props->bwa_wd, 16); + + if (props->bwa_wd && FIELD_GET(MPAMF_MBW_IDR_HAS_MAX, mbw_features)) + mpam_set_feature(mpam_feat_mbw_max, props); + + if (props->bwa_wd && FIELD_GET(MPAMF_MBW_IDR_HAS_MIN, mbw_features)) + mpam_set_feature(mpam_feat_mbw_min, props); + + if (props->bwa_wd && FIELD_GET(MPAMF_MBW_IDR_HAS_PROP, mbw_features)) + mpam_set_feature(mpam_feat_mbw_prop, props); + } + + /* Priority partitioning */ + if (FIELD_GET(MPAMF_IDR_HAS_PRI_PART, ris->idr)) { + u32 pri_features = mpam_read_partsel_reg(msc, PRI_IDR); + + props->intpri_wd = FIELD_GET(MPAMF_PRI_IDR_INTPRI_WD, pri_features); + if (props->intpri_wd && FIELD_GET(MPAMF_PRI_IDR_HAS_INTPRI, pri_features)) { + mpam_set_feature(mpam_feat_intpri_part, props); + if (FIELD_GET(MPAMF_PRI_IDR_INTPRI_0_IS_LOW, pri_features)) + mpam_set_feature(mpam_feat_intpri_part_0_low, props); + } + + props->dspri_wd = FIELD_GET(MPAMF_PRI_IDR_DSPRI_WD, pri_features); + if (props->dspri_wd && FIELD_GET(MPAMF_PRI_IDR_HAS_DSPRI, pri_features)) { + mpam_set_feature(mpam_feat_dspri_part, props); + if (FIELD_GET(MPAMF_PRI_IDR_DSPRI_0_IS_LOW, pri_features)) + mpam_set_feature(mpam_feat_dspri_part_0_low, props); + } + } + + /* Performance Monitoring */ + if (FIELD_GET(MPAMF_IDR_HAS_MSMON, ris->idr)) { + u32 msmon_features = mpam_read_partsel_reg(msc, MSMON_IDR); + + /* + * If the firmware max-nrdy-us property is missing, the + * CSU counters can't be used. Should we wait forever? + */ + err = device_property_read_u32(&msc->pdev->dev, + "arm,not-ready-us", + &msc->nrdy_usec); + + if (FIELD_GET(MPAMF_MSMON_IDR_MSMON_CSU, msmon_features)) { + u32 csumonidr; + + csumonidr = mpam_read_partsel_reg(msc, CSUMON_IDR); + props->num_csu_mon = FIELD_GET(MPAMF_CSUMON_IDR_NUM_MON, csumonidr); + if (props->num_csu_mon) { + bool hw_managed; + + mpam_set_feature(mpam_feat_msmon_csu, props); + + if (FIELD_GET(MPAMF_CSUMON_IDR_HAS_XCL, csumonidr)) + mpam_set_feature(mpam_feat_msmon_csu_xcl, props); + + /* Is NRDY hardware managed? */ + mpam_mon_sel_outer_lock(msc); + hw_managed = mpam_ris_hw_probe_hw_nrdy(ris, CSU); + mpam_mon_sel_outer_unlock(msc); + if (hw_managed) + mpam_set_feature(mpam_feat_msmon_csu_hw_nrdy, props); + } + + /* + * Accept the missing firmware property if NRDY appears + * un-implemented. + */ + if (err && mpam_has_feature(mpam_feat_msmon_csu_hw_nrdy, props)) + dev_err_once(dev, "Counters are not usable because not-ready timeout was not provided by firmware."); + } + if (FIELD_GET(MPAMF_MSMON_IDR_MSMON_MBWU, msmon_features)) { + bool has_long, hw_managed; + u32 mbwumon_idr = mpam_read_partsel_reg(msc, MBWUMON_IDR); + + props->num_mbwu_mon = FIELD_GET(MPAMF_MBWUMON_IDR_NUM_MON, mbwumon_idr); + if (props->num_mbwu_mon) { + mpam_set_feature(mpam_feat_msmon_mbwu, props); + mpam_set_feature(mpam_feat_msmon_mbwu_31counter, props); + } + + if (FIELD_GET(MPAMF_MBWUMON_IDR_HAS_RWBW, mbwumon_idr)) + mpam_set_feature(mpam_feat_msmon_mbwu_rwbw, props); + + has_long = FIELD_GET(MPAMF_MBWUMON_IDR_HAS_LONG, mbwumon_idr); + if (props->num_mbwu_mon && has_long) { + mpam_set_feature(mpam_feat_msmon_mbwu_44counter, props); + if (FIELD_GET(MPAMF_MBWUMON_IDR_LWD, mbwumon_idr)) + mpam_set_feature(mpam_feat_msmon_mbwu_63counter, props); + } + + /* Is NRDY hardware managed? */ + mpam_mon_sel_outer_lock(msc); + hw_managed = mpam_ris_hw_probe_hw_nrdy(ris, MBWU); + mpam_mon_sel_outer_unlock(msc); + if (hw_managed) + mpam_set_feature(mpam_feat_msmon_mbwu_hw_nrdy, props); + + /* + * Don't warn about any missing firmware property for + * MBWU NRDY - it doesn't make any sense! + */ + } + } + + /* + * RIS with PARTID narrowing don't have enough storage for one + * configuration per PARTID. If these are in a class we could use, + * reduce the supported partid_max to match the number of intpartid. + * If the class is unknown, just ignore it. + */ + if (FIELD_GET(MPAMF_IDR_HAS_PARTID_NRW, ris->idr) && + class->type != MPAM_CLASS_UNKNOWN) { + u32 nrwidr = mpam_read_partsel_reg(msc, PARTID_NRW_IDR); + u16 partid_max = FIELD_GET(MPAMF_PARTID_NRW_IDR_INTPARTID_MAX, nrwidr); + + mpam_set_feature(mpam_feat_partid_nrw, props); + msc->partid_max = min(msc->partid_max, partid_max); + } +} + +static int mpam_msc_hw_probe(struct mpam_msc *msc) +{ + u64 idr; + u16 partid_max; + u8 ris_idx, pmg_max; + struct mpam_msc_ris *ris; + struct device *dev = &msc->pdev->dev; + + lockdep_assert_held(&msc->probe_lock); + + idr = __mpam_read_reg(msc, MPAMF_AIDR); + if ((idr & MPAMF_AIDR_ARCH_MAJOR_REV) != MPAM_ARCHITECTURE_V1) { + dev_err_once(dev, "MSC does not match MPAM architecture v1.x\n"); + return -EIO; + } + + /* Grab an IDR value to find out how many RIS there are */ + mutex_lock(&msc->part_sel_lock); + idr = mpam_msc_read_idr(msc); + msc->iidr = mpam_read_partsel_reg(msc, IIDR); + mutex_unlock(&msc->part_sel_lock); + + mpam_enable_quirks(msc); + + msc->ris_max = FIELD_GET(MPAMF_IDR_RIS_MAX, idr); + + /* Use these values so partid/pmg always starts with a valid value */ + msc->partid_max = FIELD_GET(MPAMF_IDR_PARTID_MAX, idr); + msc->pmg_max = FIELD_GET(MPAMF_IDR_PMG_MAX, idr); + + for (ris_idx = 0; ris_idx <= msc->ris_max; ris_idx++) { + mutex_lock(&msc->part_sel_lock); + __mpam_part_sel(ris_idx, 0, msc); + idr = mpam_msc_read_idr(msc); + mutex_unlock(&msc->part_sel_lock); + + partid_max = FIELD_GET(MPAMF_IDR_PARTID_MAX, idr); + pmg_max = FIELD_GET(MPAMF_IDR_PMG_MAX, idr); + msc->partid_max = min(msc->partid_max, partid_max); + msc->pmg_max = min(msc->pmg_max, pmg_max); + msc->has_extd_esr = FIELD_GET(MPAMF_IDR_HAS_EXTD_ESR, idr); + + mutex_lock(&mpam_list_lock); + ris = mpam_get_or_create_ris(msc, ris_idx); + mutex_unlock(&mpam_list_lock); + if (IS_ERR(ris)) + return PTR_ERR(ris); + ris->idr = idr; + + mutex_lock(&msc->part_sel_lock); + __mpam_part_sel(ris_idx, 0, msc); + mpam_ris_hw_probe(ris); + mutex_unlock(&msc->part_sel_lock); + } + + /* Clear any stale errors */ + mpam_msc_clear_esr(msc); + + spin_lock(&partid_max_lock); + mpam_partid_max = min(mpam_partid_max, msc->partid_max); + mpam_pmg_max = min(mpam_pmg_max, msc->pmg_max); + spin_unlock(&partid_max_lock); + + msc->probed = true; + + return 0; +} + +struct mon_read { + struct mpam_msc_ris *ris; + struct mon_cfg *ctx; + enum mpam_device_features type; + u64 *val; + int err; + bool waited_timeout; +}; + +static bool mpam_ris_has_mbwu_long_counter(struct mpam_msc_ris *ris) +{ + return (mpam_has_feature(mpam_feat_msmon_mbwu_63counter, &ris->props) || + mpam_has_feature(mpam_feat_msmon_mbwu_44counter, &ris->props)); +} + +static u64 mpam_msc_read_mbwu_l(struct mpam_msc *msc) +{ + int retry = 3; + u32 mbwu_l_low; + u64 mbwu_l_high1, mbwu_l_high2; + + mpam_mon_sel_lock_held(msc); + + WARN_ON_ONCE((MSMON_MBWU_L + sizeof(u64)) > msc->mapped_hwpage_sz); + WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility)); + + mbwu_l_high2 = __mpam_read_reg(msc, MSMON_MBWU_L + 4); + do { + mbwu_l_high1 = mbwu_l_high2; + mbwu_l_low = __mpam_read_reg(msc, MSMON_MBWU_L); + mbwu_l_high2 = __mpam_read_reg(msc, MSMON_MBWU_L + 4); + + retry--; + } while (mbwu_l_high1 != mbwu_l_high2 && retry > 0); + + if (mbwu_l_high1 == mbwu_l_high2) + return (mbwu_l_high1 << 32) | mbwu_l_low; + return MSMON___NRDY_L; +} + +static void mpam_msc_zero_mbwu_l(struct mpam_msc *msc) +{ + mpam_mon_sel_lock_held(msc); + + WARN_ON_ONCE((MSMON_MBWU_L + sizeof(u64)) > msc->mapped_hwpage_sz); + WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility)); + + __mpam_write_reg(msc, MSMON_MBWU_L, 0); + __mpam_write_reg(msc, MSMON_MBWU_L + 4, 0); +} + +static void gen_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val, + u32 *flt_val) +{ + struct mon_cfg *ctx = m->ctx; + + /* + * For CSU counters its implementation-defined what happens when not + * filtering by partid. + */ + *ctl_val = MSMON_CFG_x_CTL_MATCH_PARTID; + + *flt_val = FIELD_PREP(MSMON_CFG_x_FLT_PARTID, ctx->partid); + + if (m->ctx->match_pmg) { + *ctl_val |= MSMON_CFG_x_CTL_MATCH_PMG; + *flt_val |= FIELD_PREP(MSMON_CFG_x_FLT_PMG, ctx->pmg); + } + + switch (m->type) { + case mpam_feat_msmon_csu: + *ctl_val |= MSMON_CFG_CSU_CTL_TYPE_CSU; + + if (mpam_has_feature(mpam_feat_msmon_csu_xcl, &m->ris->props)) + *flt_val |= FIELD_PREP(MSMON_CFG_CSU_FLT_XCL, + ctx->csu_exclude_clean); + + break; + case mpam_feat_msmon_mbwu_31counter: + case mpam_feat_msmon_mbwu_44counter: + case mpam_feat_msmon_mbwu_63counter: + *ctl_val |= MSMON_CFG_MBWU_CTL_TYPE_MBWU; + + if (mpam_has_feature(mpam_feat_msmon_mbwu_rwbw, &m->ris->props)) + *flt_val |= FIELD_PREP(MSMON_CFG_MBWU_FLT_RWBW, ctx->opts); + + break; + default: + return; + } +} + +static void read_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val, + u32 *flt_val) +{ + struct mpam_msc *msc = m->ris->vmsc->msc; + + switch (m->type) { + case mpam_feat_msmon_csu: + *ctl_val = mpam_read_monsel_reg(msc, CFG_CSU_CTL); + *flt_val = mpam_read_monsel_reg(msc, CFG_CSU_FLT); + return; + case mpam_feat_msmon_mbwu_31counter: + case mpam_feat_msmon_mbwu_44counter: + case mpam_feat_msmon_mbwu_63counter: + *ctl_val = mpam_read_monsel_reg(msc, CFG_MBWU_CTL); + *flt_val = mpam_read_monsel_reg(msc, CFG_MBWU_FLT); + return; + default: + return; + } +} + +/* Remove values set by the hardware to prevent apparent mismatches. */ +static void clean_msmon_ctl_val(u32 *cur_ctl) +{ + *cur_ctl &= ~MSMON_CFG_x_CTL_OFLOW_STATUS; + + if (FIELD_GET(MSMON_CFG_x_CTL_TYPE, *cur_ctl) == MSMON_CFG_MBWU_CTL_TYPE_MBWU) + *cur_ctl &= ~MSMON_CFG_MBWU_CTL_OFLOW_STATUS_L; +} + +static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val, + u32 flt_val) +{ + struct msmon_mbwu_state *mbwu_state; + struct mpam_msc *msc = m->ris->vmsc->msc; + + /* + * Write the ctl_val with the enable bit cleared, reset the counter, + * then enable counter. + */ + switch (m->type) { + case mpam_feat_msmon_csu: + mpam_write_monsel_reg(msc, CFG_CSU_FLT, flt_val); + mpam_write_monsel_reg(msc, CFG_CSU_CTL, ctl_val); + mpam_write_monsel_reg(msc, CSU, 0); + mpam_write_monsel_reg(msc, CFG_CSU_CTL, ctl_val | MSMON_CFG_x_CTL_EN); + break; + case mpam_feat_msmon_mbwu_44counter: + case mpam_feat_msmon_mbwu_63counter: + mpam_msc_zero_mbwu_l(m->ris->vmsc->msc); + fallthrough; + case mpam_feat_msmon_mbwu_31counter: + mpam_write_monsel_reg(msc, CFG_MBWU_FLT, flt_val); + mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val); + mpam_write_monsel_reg(msc, MBWU, 0); + + mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val | MSMON_CFG_x_CTL_EN); + + mbwu_state = &m->ris->mbwu_state[m->ctx->mon]; + mbwu_state->prev_val = 0; + + break; + default: + return; + } +} + +static u64 mpam_msmon_overflow_val(enum mpam_device_features type) +{ + /* TODO: implement scaling counters */ + switch (type) { + case mpam_feat_msmon_mbwu_63counter: + return GENMASK_ULL(62, 0); + case mpam_feat_msmon_mbwu_44counter: + return GENMASK_ULL(43, 0); + case mpam_feat_msmon_mbwu_31counter: + return GENMASK_ULL(30, 0); + default: + return 0; + } +} + +/* Call with MSC lock held */ +static void __ris_msmon_read(void *arg) +{ + bool nrdy = false; + bool config_mismatch; + struct mon_read *m = arg; + u64 now, overflow_val = 0; + struct mon_cfg *ctx = m->ctx; + bool reset_on_next_read = false; + struct mpam_msc_ris *ris = m->ris; + struct msmon_mbwu_state *mbwu_state; + struct mpam_props *rprops = &ris->props; + struct mpam_msc *msc = m->ris->vmsc->msc; + u32 mon_sel, ctl_val, flt_val, cur_ctl, cur_flt; + + if (!mpam_mon_sel_inner_lock(msc)) { + m->err = -EIO; + return; + } + mon_sel = FIELD_PREP(MSMON_CFG_MON_SEL_MON_SEL, ctx->mon) | + FIELD_PREP(MSMON_CFG_MON_SEL_RIS, ris->ris_idx); + mpam_write_monsel_reg(msc, CFG_MON_SEL, mon_sel); + + if (m->type == mpam_feat_msmon_mbwu) { + mbwu_state = &ris->mbwu_state[ctx->mon]; + if (mbwu_state) { + reset_on_next_read = mbwu_state->reset_on_next_read; + mbwu_state->reset_on_next_read = false; + } + } + + /* + * Read the existing configuration to avoid re-writing the same values. + * This saves waiting for 'nrdy' on subsequent reads. + */ + read_msmon_ctl_flt_vals(m, &cur_ctl, &cur_flt); + clean_msmon_ctl_val(&cur_ctl); + gen_msmon_ctl_flt_vals(m, &ctl_val, &flt_val); + config_mismatch = cur_flt != flt_val || + cur_ctl != (ctl_val | MSMON_CFG_x_CTL_EN); + + if (config_mismatch || reset_on_next_read) + write_msmon_ctl_flt_vals(m, ctl_val, flt_val); + + switch (m->type) { + case mpam_feat_msmon_csu: + now = mpam_read_monsel_reg(msc, CSU); + if (mpam_has_feature(mpam_feat_msmon_csu_hw_nrdy, rprops)) + nrdy = now & MSMON___NRDY; + now = FIELD_GET(MSMON___VALUE, now); + + if (mpam_has_quirk(IGNORE_CSU_NRDY, msc) && m->waited_timeout) + nrdy = false; + + break; + case mpam_feat_msmon_mbwu_31counter: + case mpam_feat_msmon_mbwu_44counter: + case mpam_feat_msmon_mbwu_63counter: + if (m->type != mpam_feat_msmon_mbwu_31counter) { + now = mpam_msc_read_mbwu_l(msc); + if (mpam_has_feature(mpam_feat_msmon_mbwu_hw_nrdy, rprops)) + nrdy = now & MSMON___NRDY_L; + + if (m->type == mpam_feat_msmon_mbwu_63counter) + now = FIELD_GET(MSMON___LWD_VALUE, now); + else + now = FIELD_GET(MSMON___L_VALUE, now); + } else { + now = mpam_read_monsel_reg(msc, MBWU); + if (mpam_has_feature(mpam_feat_msmon_mbwu_hw_nrdy, rprops)) + nrdy = now & MSMON___NRDY; + now = FIELD_GET(MSMON___VALUE, now); + } + + if (mpam_has_quirk(T241_MBW_COUNTER_SCALE_64, msc)) + now *= 64; + + if (nrdy) + break; + + mbwu_state = &ris->mbwu_state[ctx->mon]; + + /* Add any pre-overflow value to the mbwu_state->val */ + if (mbwu_state->prev_val > now) { + overflow_val = mpam_msmon_overflow_val(m->type); + if (mpam_has_quirk(T241_MBW_COUNTER_SCALE_64, msc)) + overflow_val *= 64; + overflow_val -= mbwu_state->prev_val; + } + + mbwu_state->prev_val = now; + mbwu_state->correction += overflow_val; + + /* Include bandwidth consumed before the last hardware reset */ + now += mbwu_state->correction; + break; + default: + m->err = -EINVAL; + break; + } + mpam_mon_sel_inner_unlock(msc); + + if (nrdy) { + msc->nrdy_retry_count++; + m->err = -EBUSY; + return; + } + + *m->val += now; +} + +static int _msmon_read(struct mpam_component *comp, struct mon_read *arg) +{ + int err, any_err = 0; + struct mpam_vmsc *vmsc; + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(vmsc, &comp->vmsc, comp_list, + srcu_read_lock_held(&mpam_srcu)) { + struct mpam_msc *msc = vmsc->msc; + struct mpam_msc_ris *ris; + + mpam_mon_sel_outer_lock(msc); + list_for_each_entry_srcu(ris, &vmsc->ris, vmsc_list, + srcu_read_lock_held(&mpam_srcu)) { + arg->ris = ris; + + err = smp_call_function_any(&msc->accessibility, + __ris_msmon_read, arg, + true); + if (!err && arg->err) + err = arg->err; + + /* + * Save one error to be returned to the caller, but + * keep reading counters so that get reprogrammed. On + * platforms with NRDY this lets us wait once. + */ + if (err) + any_err = err; + } + mpam_mon_sel_outer_unlock(msc); + } + + return any_err; +} + +static enum mpam_device_features mpam_msmon_choose_counter(struct mpam_class *class) +{ + struct mpam_props *cprops = &class->props; + + if (mpam_has_feature(mpam_feat_msmon_mbwu_44counter, cprops)) + return mpam_feat_msmon_mbwu_44counter; + if (mpam_has_feature(mpam_feat_msmon_mbwu_63counter, cprops)) + return mpam_feat_msmon_mbwu_63counter; + + return mpam_feat_msmon_mbwu_31counter; +} + +int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx, + enum mpam_device_features type, u64 *val) +{ + int err; + struct mon_read arg; + u64 wait_jiffies = 0; + struct mpam_class *class = comp->class; + struct mpam_props *cprops = &class->props; + + might_sleep(); + + if (!mpam_is_enabled()) + return -EIO; + + if (!mpam_has_feature(type, cprops)) + return -EOPNOTSUPP; + + arg = (struct mon_read) { + .ctx = ctx, + .type = type, + .val = val, + }; + *val = 0; + + if (type == mpam_feat_msmon_mbwu) + type = mpam_msmon_choose_counter(class); + + err = _msmon_read(comp, &arg); + if (err == -EBUSY && class->nrdy_usec) + wait_jiffies = usecs_to_jiffies(class->nrdy_usec); + + while (wait_jiffies) + wait_jiffies = schedule_timeout_uninterruptible(wait_jiffies); + + if (err == -EBUSY) { + arg = (struct mon_read) { + .ctx = ctx, + .type = type, + .val = val, + .waited_timeout = true, + }; + *val = 0; + + err = _msmon_read(comp, &arg); + } + + return err; +} + +void mpam_msmon_reset_all_mbwu(struct mpam_component *comp) +{ + int idx, i; + struct mpam_msc *msc; + struct mpam_vmsc *vmsc; + struct mpam_msc_ris *ris; + + if (!mpam_is_enabled()) + return; + + idx = srcu_read_lock(&mpam_srcu); + list_for_each_entry_rcu(vmsc, &comp->vmsc, comp_list) { + if (!mpam_has_feature(mpam_feat_msmon_mbwu, &vmsc->props)) + continue; + + msc = vmsc->msc; + mpam_mon_sel_outer_lock(msc); + list_for_each_entry_rcu(ris, &msc->ris, vmsc_list) { + if (!mpam_has_feature(mpam_feat_msmon_mbwu, &ris->props)) + continue; + + if (WARN_ON_ONCE(!mpam_mon_sel_inner_lock(msc))) + continue; + + for (i = 0; i < ris->props.num_mbwu_mon; i++) { + ris->mbwu_state[i].correction = 0; + ris->mbwu_state[i].reset_on_next_read = true; + } + mpam_mon_sel_inner_unlock(msc); + } + mpam_mon_sel_outer_unlock(msc); + } + srcu_read_unlock(&mpam_srcu, idx); +} + +void mpam_msmon_reset_mbwu(struct mpam_component *comp, struct mon_cfg *ctx) +{ + struct mpam_msc *msc; + struct mpam_vmsc *vmsc; + struct mpam_msc_ris *ris; + + if (!mpam_is_enabled()) + return; + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(vmsc, &comp->vmsc, comp_list, + srcu_read_lock_held(&mpam_srcu)) { + if (!mpam_has_feature(mpam_feat_msmon_mbwu, &vmsc->props)) + continue; + + msc = vmsc->msc; + mpam_mon_sel_outer_lock(msc); + list_for_each_entry_srcu(ris, &vmsc->ris, vmsc_list, + srcu_read_lock_held(&mpam_srcu)) { + if (!mpam_has_feature(mpam_feat_msmon_mbwu, &ris->props)) + continue; + + if (WARN_ON_ONCE(!mpam_mon_sel_inner_lock(msc))) + continue; + + ris->mbwu_state[ctx->mon].correction = 0; + ris->mbwu_state[ctx->mon].reset_on_next_read = true; + mpam_mon_sel_inner_unlock(msc); + } + mpam_mon_sel_outer_unlock(msc); + } +} + +static void mpam_reset_msc_bitmap(struct mpam_msc *msc, u16 reg, u16 wd) +{ + u32 num_words, msb; + u32 bm = ~0; + int i; + + lockdep_assert_held(&msc->part_sel_lock); + + if (wd == 0) + return; + + /* + * Write all ~0 to all but the last 32bit-word, which may + * have fewer bits... + */ + num_words = DIV_ROUND_UP(wd, 32); + for (i = 0; i < num_words - 1; i++, reg += sizeof(bm)) + __mpam_write_reg(msc, reg, bm); + + /* + * ....and then the last (maybe) partial 32bit word. When wd is a + * multiple of 32, msb should be 31 to write a full 32bit word. + */ + msb = (wd - 1) % 32; + bm = GENMASK(msb, 0); + __mpam_write_reg(msc, reg, bm); +} + +static void mpam_apply_t241_erratum(struct mpam_msc_ris *ris, u16 partid) +{ + int sidx, i, lcount = 1000; + void __iomem *regs; + u64 val0, val; + + regs = t241_scratch_regs[ris->vmsc->msc->t241_id]; + + for (i = 0; i < lcount; i++) { + /* Read the shadow register at index 0 */ + val0 = readq_relaxed(regs + T241_SHADOW_REG_OFF(0, partid)); + + /* Check if all the shadow registers have the same value */ + for (sidx = 1; sidx < T241_CHIP_NSLICES; sidx++) { + val = readq_relaxed(regs + + T241_SHADOW_REG_OFF(sidx, partid)); + if (val != val0) + break; + } + if (sidx == T241_CHIP_NSLICES) + break; + } + + if (i == lcount) + pr_warn_once("t241: inconsistent values in shadow regs"); + + /* Write a value zero to spare registers to take effect of MBW conf */ + writeq_relaxed(0, regs + T241_SPARE_REG0_OFF); + writeq_relaxed(0, regs + T241_SPARE_REG1_OFF); +} + +static void mpam_quirk_post_config_change(struct mpam_msc_ris *ris, u16 partid, + struct mpam_config *cfg) +{ + if (mpam_has_quirk(T241_SCRUB_SHADOW_REGS, ris->vmsc->msc)) + mpam_apply_t241_erratum(ris, partid); +} + +/* Called via IPI. Call while holding an SRCU reference */ +static void mpam_reprogram_ris_partid(struct mpam_msc_ris *ris, u16 partid, + struct mpam_config *cfg) +{ + u32 pri_val = 0; + u16 cmax = MPAMCFG_CMAX_CMAX; + struct mpam_msc *msc = ris->vmsc->msc; + struct mpam_props *rprops = &ris->props; + u16 dspri = GENMASK(rprops->dspri_wd, 0); + u16 intpri = GENMASK(rprops->intpri_wd, 0); + + mutex_lock(&msc->part_sel_lock); + __mpam_part_sel(ris->ris_idx, partid, msc); + + if (mpam_has_feature(mpam_feat_partid_nrw, rprops)) { + /* Update the intpartid mapping */ + mpam_write_partsel_reg(msc, INTPARTID, + MPAMCFG_INTPARTID_INTERNAL | partid); + + /* + * Then switch to the 'internal' partid to update the + * configuration. + */ + __mpam_intpart_sel(ris->ris_idx, partid, msc); + } + + if (mpam_has_feature(mpam_feat_cpor_part, rprops) && + mpam_has_feature(mpam_feat_cpor_part, cfg)) { + if (cfg->reset_cpbm) + mpam_reset_msc_bitmap(msc, MPAMCFG_CPBM, + rprops->cpbm_wd); + else + mpam_write_partsel_reg(msc, CPBM, cfg->cpbm); + } + + if (mpam_has_feature(mpam_feat_mbw_part, rprops) && + mpam_has_feature(mpam_feat_mbw_part, cfg)) { + if (cfg->reset_mbw_pbm) + mpam_reset_msc_bitmap(msc, MPAMCFG_MBW_PBM, + rprops->mbw_pbm_bits); + else + mpam_write_partsel_reg(msc, MBW_PBM, cfg->mbw_pbm); + } + + if (mpam_has_feature(mpam_feat_mbw_min, rprops) && + mpam_has_feature(mpam_feat_mbw_min, cfg)) + mpam_write_partsel_reg(msc, MBW_MIN, cfg->mbw_min); + + if (mpam_has_feature(mpam_feat_mbw_max, rprops) && + mpam_has_feature(mpam_feat_mbw_max, cfg)) + mpam_write_partsel_reg(msc, MBW_MAX, cfg->mbw_max); + + if (mpam_has_feature(mpam_feat_mbw_prop, rprops) && + mpam_has_feature(mpam_feat_mbw_prop, cfg)) + mpam_write_partsel_reg(msc, MBW_PROP, 0); + + if (mpam_has_feature(mpam_feat_cmax_cmax, rprops)) { + if (mpam_has_feature(mpam_feat_cmax_cmax, cfg)) { + u32 cmax_val = cfg->cmax; + + if (cfg->cmax_softlim) + cmax_val |= MPAMCFG_CMAX_SOFTLIM; + mpam_write_partsel_reg(msc, CMAX, cmax_val); + } else { + mpam_write_partsel_reg(msc, CMAX, cmax); + } + } + + if (mpam_has_feature(mpam_feat_cmax_cmin, rprops)) { + if (mpam_has_feature(mpam_feat_cmax_cmin, cfg)) { + mpam_write_partsel_reg(msc, CMIN, cfg->cmin); + } else { + mpam_write_partsel_reg(msc, CMIN, 0); + } + } + + if (mpam_has_feature(mpam_feat_cmax_cassoc, rprops)) + mpam_write_partsel_reg(msc, CASSOC, MPAMCFG_CASSOC_CASSOC); + + if (mpam_has_feature(mpam_feat_intpri_part, rprops) || + mpam_has_feature(mpam_feat_dspri_part, rprops)) { + /* aces high? */ + if (!mpam_has_feature(mpam_feat_intpri_part_0_low, rprops)) + intpri = 0; + if (!mpam_has_feature(mpam_feat_dspri_part_0_low, rprops)) + dspri = 0; + + if (mpam_has_feature(mpam_feat_intpri_part, rprops)) + pri_val |= FIELD_PREP(MPAMCFG_PRI_INTPRI, intpri); + if (mpam_has_feature(mpam_feat_dspri_part, rprops)) + pri_val |= FIELD_PREP(MPAMCFG_PRI_DSPRI, dspri); + + mpam_write_partsel_reg(msc, PRI, pri_val); + } + + mpam_quirk_post_config_change(ris, partid, cfg); + + mutex_unlock(&msc->part_sel_lock); +} + +struct reprogram_ris { + struct mpam_msc_ris *ris; + struct mpam_config *cfg; +}; + +/* Call with MSC lock held */ +static int mpam_reprogram_ris(void *_arg) +{ + u16 partid, partid_max; + struct reprogram_ris *arg = _arg; + struct mpam_msc_ris *ris = arg->ris; + struct mpam_config *cfg = arg->cfg; + + if (ris->in_reset_state) + return 0; + + spin_lock(&partid_max_lock); + partid_max = mpam_partid_max; + spin_unlock(&partid_max_lock); + for (partid = 0; partid <= partid_max; partid++) + mpam_reprogram_ris_partid(ris, partid, cfg); + + return 0; +} + +/* Call with MSC lock held */ +static int mpam_restore_mbwu_state(void *_ris) +{ + int i; + struct mon_read mwbu_arg; + struct mpam_msc_ris *ris = _ris; + struct mpam_msc *msc = ris->vmsc->msc; + struct mpam_class *class = ris->vmsc->comp->class; + + mpam_mon_sel_outer_lock(msc); + + for (i = 0; i < ris->props.num_mbwu_mon; i++) { + if (ris->mbwu_state[i].enabled) { + mwbu_arg.ris = ris; + mwbu_arg.ctx = &ris->mbwu_state[i].cfg; + mwbu_arg.type = mpam_msmon_choose_counter(class); + + __ris_msmon_read(&mwbu_arg); + } + } + + mpam_mon_sel_outer_unlock(msc); + + return 0; +} + +/* Call with MSC lock and outer mon_sel lock held */ +static int mpam_save_mbwu_state(void *arg) +{ + int i; + u64 val; + struct mon_cfg *cfg; + u32 cur_flt, cur_ctl, mon_sel; + struct mpam_msc_ris *ris = arg; + struct msmon_mbwu_state *mbwu_state; + struct mpam_msc *msc = ris->vmsc->msc; + + for (i = 0; i < ris->props.num_mbwu_mon; i++) { + mbwu_state = &ris->mbwu_state[i]; + cfg = &mbwu_state->cfg; + + if (WARN_ON_ONCE(!mpam_mon_sel_inner_lock(msc))) + return -EIO; + + mon_sel = FIELD_PREP(MSMON_CFG_MON_SEL_MON_SEL, i) | + FIELD_PREP(MSMON_CFG_MON_SEL_RIS, ris->ris_idx); + mpam_write_monsel_reg(msc, CFG_MON_SEL, mon_sel); + + cur_flt = mpam_read_monsel_reg(msc, CFG_MBWU_FLT); + cur_ctl = mpam_read_monsel_reg(msc, CFG_MBWU_CTL); + mpam_write_monsel_reg(msc, CFG_MBWU_CTL, 0); + + if (mpam_ris_has_mbwu_long_counter(ris)) { + val = mpam_msc_read_mbwu_l(msc); + mpam_msc_zero_mbwu_l(msc); + } else { + val = mpam_read_monsel_reg(msc, MBWU); + mpam_write_monsel_reg(msc, MBWU, 0); + } + + cfg->mon = i; + cfg->pmg = FIELD_GET(MSMON_CFG_x_FLT_PMG, cur_flt); + cfg->match_pmg = FIELD_GET(MSMON_CFG_x_CTL_MATCH_PMG, cur_ctl); + cfg->partid = FIELD_GET(MSMON_CFG_x_FLT_PARTID, cur_flt); + mbwu_state->correction += val; + mbwu_state->enabled = FIELD_GET(MSMON_CFG_x_CTL_EN, cur_ctl); + mpam_mon_sel_inner_unlock(msc); + } + + return 0; +} + +static void mpam_init_reset_cfg(struct mpam_config *reset_cfg) +{ + *reset_cfg = (struct mpam_config) { + .cpbm = ~0, + .mbw_pbm = ~0, + .mbw_max = MPAMCFG_MBW_MAX_MAX, + + .reset_cpbm = true, + .reset_mbw_pbm = true, + }; + bitmap_fill(reset_cfg->features, MPAM_FEATURE_LAST); +} + +/* + * This is not part of mpam_init_reset_cfg() as high level callers have the + * class, and low level callers a ris. + */ +static void mpam_wa_t241_force_mbw_min_to_one(struct mpam_config *cfg, + struct mpam_props *props) +{ + u16 max_hw_value, min_hw_granule, res0_bits; + + res0_bits = 16 - props->bwa_wd; + max_hw_value = ((1 << props->bwa_wd) - 1) << res0_bits; + min_hw_granule = ~max_hw_value; + + cfg->mbw_min = min_hw_granule + 1; +} + +/* + * Called via smp_call_on_cpu() to prevent migration, while still being + * pre-emptible. Caller must hold mpam_srcu. + */ +static int mpam_reset_ris(void *arg) +{ + struct mpam_config reset_cfg; + struct mpam_msc_ris *ris = arg; + struct reprogram_ris reprogram_arg; + struct mpam_msc *msc = ris->vmsc->msc; + + if (ris->in_reset_state) + return 0; + + mpam_init_reset_cfg(&reset_cfg); + if (mpam_has_quirk(T241_FORCE_MBW_MIN_TO_ONE, msc)) + mpam_wa_t241_force_mbw_min_to_one(&reset_cfg, &ris->props); + + reprogram_arg.ris = ris; + reprogram_arg.cfg = &reset_cfg; + + mpam_reprogram_ris(&reprogram_arg); + + return 0; +} + +/* + * Get the preferred CPU for this MSC. If it is accessible from this CPU, + * this CPU is preferred. This can be preempted/migrated, it will only result + * in more work. + */ +static int mpam_get_msc_preferred_cpu(struct mpam_msc *msc) +{ + int cpu = raw_smp_processor_id(); + + if (cpumask_test_cpu(cpu, &msc->accessibility)) + return cpu; + + return cpumask_first_and(&msc->accessibility, cpu_online_mask); +} + +static int mpam_touch_msc(struct mpam_msc *msc, int (*fn)(void *a), void *arg) +{ + lockdep_assert_irqs_enabled(); + lockdep_assert_cpus_held(); + WARN_ON_ONCE(!srcu_read_lock_held((&mpam_srcu))); + + return smp_call_on_cpu(mpam_get_msc_preferred_cpu(msc), fn, arg, true); +} + +static void mpam_reset_msc(struct mpam_msc *msc, bool online) +{ + struct mpam_msc_ris *ris; + + mpam_mon_sel_outer_lock(msc); + list_for_each_entry_srcu(ris, &msc->ris, msc_list, srcu_read_lock_held(&mpam_srcu)) { + mpam_touch_msc(msc, &mpam_reset_ris, ris); + + /* + * Set in_reset_state when coming online. The reset state + * for non-zero partid may be lost while the CPUs are offline. + */ + ris->in_reset_state = online; + + if (mpam_is_enabled() && !online) + mpam_touch_msc(msc, &mpam_save_mbwu_state, ris); + } + mpam_mon_sel_outer_unlock(msc); +} + +static void mpam_reprogram_msc(struct mpam_msc *msc) +{ + u16 partid; + bool reset; + struct mpam_config *cfg; + struct mpam_msc_ris *ris; + + /* + * No lock for mpam_partid_max as partid_max_published has been + * set by mpam_enabled(), so the values can no longer change. + */ + mpam_assert_partid_sizes_fixed(); + + list_for_each_entry_srcu(ris, &msc->ris, msc_list, + srcu_read_lock_held(&mpam_srcu)) { + if (!mpam_is_enabled() && !ris->in_reset_state) { + mpam_touch_msc(msc, &mpam_reset_ris, ris); + ris->in_reset_state = true; + continue; + } + + reset = true; + for (partid = 0; partid <= mpam_partid_max; partid++) { + cfg = &ris->vmsc->comp->cfg[partid]; + if (!bitmap_empty(cfg->features, MPAM_FEATURE_LAST)) + reset = false; + + mpam_reprogram_ris_partid(ris, partid, cfg); + } + ris->in_reset_state = reset; + + if (mpam_has_feature(mpam_feat_msmon_mbwu, &ris->props)) + mpam_touch_msc(msc, &mpam_restore_mbwu_state, ris); + } +} + +static void _enable_percpu_irq(void *_irq) +{ + int *irq = _irq; + + enable_percpu_irq(*irq, IRQ_TYPE_NONE); +} + +static int mpam_cpu_online(unsigned int cpu) +{ + struct mpam_msc *msc; + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list, + srcu_read_lock_held(&mpam_srcu)) { + if (!cpumask_test_cpu(cpu, &msc->accessibility)) + continue; + + if (msc->reenable_error_ppi) + _enable_percpu_irq(&msc->reenable_error_ppi); + + if (atomic_fetch_inc(&msc->online_refs) == 0) + mpam_reprogram_msc(msc); + } + + if (mpam_resctrl_enabled) + mpam_resctrl_online_cpu(cpu); + + return 0; +} + +/* Before mpam is enabled, try to probe new MSC */ +static int mpam_discovery_cpu_online(unsigned int cpu) +{ + int err = 0; + struct mpam_msc *msc; + bool new_device_probed = false; + + if (mpam_is_enabled()) + return 0; + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list, + srcu_read_lock_held(&mpam_srcu)) { + if (!cpumask_test_cpu(cpu, &msc->accessibility)) + continue; + + mutex_lock(&msc->probe_lock); + if (!msc->probed) + err = mpam_msc_hw_probe(msc); + mutex_unlock(&msc->probe_lock); + + if (err) + break; + new_device_probed = true; + } + + if (new_device_probed && !err) + schedule_work(&mpam_enable_work); + if (err) { + mpam_disable_reason = "error during probing"; + schedule_work(&mpam_broken_work); + } + + return err; +} + +static int mpam_cpu_offline(unsigned int cpu) +{ + struct mpam_msc *msc; + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list, + srcu_read_lock_held(&mpam_srcu)) { + if (!cpumask_test_cpu(cpu, &msc->accessibility)) + continue; + + if (msc->reenable_error_ppi) + disable_percpu_irq(msc->reenable_error_ppi); + + if (atomic_dec_and_test(&msc->online_refs)) + mpam_reset_msc(msc, false); + } + + if (mpam_resctrl_enabled) + mpam_resctrl_offline_cpu(cpu); + + return 0; +} + +static void mpam_register_cpuhp_callbacks(int (*online)(unsigned int online), + int (*offline)(unsigned int offline), + char *name) +{ + mutex_lock(&mpam_cpuhp_state_lock); + if (mpam_cpuhp_state) { + cpuhp_remove_state(mpam_cpuhp_state); + mpam_cpuhp_state = 0; + } + + mpam_cpuhp_state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, name, online, + offline); + if (mpam_cpuhp_state <= 0) { + pr_err("Failed to register cpuhp callbacks"); + mpam_cpuhp_state = 0; + } + mutex_unlock(&mpam_cpuhp_state_lock); +} + +static int __setup_ppi(struct mpam_msc *msc) +{ + int cpu; + + msc->error_dev_id = alloc_percpu(struct mpam_msc *); + if (!msc->error_dev_id) + return -ENOMEM; + + for_each_cpu(cpu, &msc->accessibility) + *per_cpu_ptr(msc->error_dev_id, cpu) = msc; + + return 0; +} + +static int mpam_msc_setup_error_irq(struct mpam_msc *msc) +{ + int irq; + + irq = platform_get_irq_byname_optional(msc->pdev, "error"); + if (irq <= 0) + return 0; + + /* Allocate and initialise the percpu device pointer for PPI */ + if (irq_is_percpu(irq)) + return __setup_ppi(msc); + + /* sanity check: shared interrupts can be routed anywhere? */ + if (!cpumask_equal(&msc->accessibility, cpu_possible_mask)) { + pr_err_once("msc:%u is a private resource with a shared error interrupt", + msc->id); + return -EINVAL; + } + + return 0; +} + +static int mpam_dt_count_msc(void) +{ + int count = 0; + struct device_node *np; + + for_each_compatible_node(np, NULL, "arm,mpam-msc") { + if (of_device_is_available(np)) + count++; + } + + return count; +} + +static int mpam_dt_parse_resource(struct mpam_msc *msc, struct device_node *np, + u32 ris_idx) +{ + int err = 0; + u32 class_id = 0; + unsigned long component_id = 0; + struct device *dev = &msc->pdev->dev; + enum mpam_class_types type = MPAM_CLASS_UNKNOWN; + struct device_node *cache __free(device_node) = NULL; + struct device_node *memory __free(device_node) = NULL; + struct device_node *parent __free(device_node) = of_get_parent(np); + + if (of_device_is_compatible(np, "arm,mpam-cache")) { + cache = of_parse_phandle(np, "arm,mpam-device", 0); + if (!cache) { + dev_err_once(dev, "Failed to read phandle\n"); + return -EINVAL; + } + type = MPAM_CLASS_CACHE; + + } else if (of_device_is_compatible(parent, "cache")) { + cache = parent; + type = MPAM_CLASS_CACHE; + } else if (of_device_is_compatible(np, "arm,mpam-memory")) { + memory = of_parse_phandle(np, "arm,mpam-device", 0); + if (!memory) { + dev_err_once(dev, "Failed to read phandle\n"); + return -EINVAL; + } + type = MPAM_CLASS_MEMORY; + } else if (of_device_is_compatible(np, "arm,mpam-memory-controller-msc")) { + memory = parent; + type = MPAM_CLASS_MEMORY; + } else { + /* + * For now, only caches and memory controllers are + * supported. + */ + return err; + } + + /* Determine the class and component ids, based on type. */ + if (type == MPAM_CLASS_CACHE) { + err = of_property_read_u32(cache, "cache-level", &class_id); + if (err) { + dev_err_once(dev, "Failed to read cache-level\n"); + return err; + } + component_id = cache_of_calculate_id(cache); + if (component_id == ~0) { + dev_err_once(dev, "Failed to calculate cache-id\n"); + return -ENOENT; + } + } else if (type == MPAM_CLASS_MEMORY) { + err = of_node_to_nid(np); + component_id = (err == NUMA_NO_NODE) ? 0 : err; + class_id = 255; + } + + return mpam_ris_create(msc, ris_idx, type, class_id, component_id); +} + +static int mpam_dt_parse_resources(struct mpam_msc *msc, void *ignored) +{ + u64 ris_idx = 0; + int err, num_ris = 0; + struct device_node *np; + + np = msc->pdev->dev.of_node; + for_each_available_child_of_node_scoped(np, iter) { + err = of_property_read_reg(iter, 0, &ris_idx, NULL); + if (!err) { + num_ris++; + err = mpam_dt_parse_resource(msc, iter, ris_idx); + if (err) + return err; + } + } + + if (!num_ris) + err = mpam_dt_parse_resource(msc, np, 0); + + return err; +} + +/* + * An MSC can control traffic from a set of CPUs, but may only be accessible + * from a (hopefully wider) set of CPUs. The common reason for this is power + * management. If all the CPUs in a cluster are in PSCI:CPU_SUSPEND, the + * corresponding cache may also be powered off. By making accesses from + * one of those CPUs, we ensure this isn't the case. + */ +static int update_msc_accessibility(struct mpam_msc *msc) +{ + struct device *dev = &msc->pdev->dev; + struct device_node *parent; + u32 affinity_id; + int err; + + if (!acpi_disabled) { + err = device_property_read_u32(&msc->pdev->dev, "cpu_affinity", + &affinity_id); + if (err) + cpumask_copy(&msc->accessibility, cpu_possible_mask); + else + acpi_pptt_get_cpus_from_container(affinity_id, + &msc->accessibility); + + return 0; + } + + /* Where an MSC can be accessed from depends on the path to of_node. */ + parent = of_get_parent(msc->pdev->dev.of_node); + if (parent == of_root) { + cpumask_copy(&msc->accessibility, cpu_possible_mask); + err = 0; + } else { + if (of_device_is_compatible(parent, "cache")) { + err = get_cpumask_from_cache(parent, + &msc->accessibility); + } else if (of_device_is_compatible(parent, "memory")) { + cpumask_copy(&msc->accessibility, cpu_possible_mask); + err = 0; + } else { + err = -EINVAL; + dev_err_once(dev, "Cannot determine accessibility of MSC.\n"); + } + } + of_node_put(parent); + + return err; +} + +static int fw_num_msc; + +/* + * There are two ways of reaching a struct mpam_msc_ris. Via the + * class->component->vmsc->ris, or via the msc. + * When destroying the msc, the other side needs unlinking and cleaning up too. + */ +static void mpam_msc_destroy(struct mpam_msc *msc) +{ + struct platform_device *pdev = msc->pdev; + struct mpam_msc_ris *ris, *tmp; + + lockdep_assert_held(&mpam_list_lock); + + list_for_each_entry_safe(ris, tmp, &msc->ris, msc_list) + mpam_ris_destroy(ris); + + list_del_rcu(&msc->all_msc_list); + platform_set_drvdata(pdev, NULL); + + debugfs_remove_recursive(msc->debugfs); + msc->debugfs = NULL; + + if (msc->iface == MPAM_IFACE_PCC) + mpam_pcc_put(msc->pcc_subspace_id); + + add_to_garbage(msc); +} + +void mpam_pcc_rx_callback(struct mbox_client *cl, void *msg) +{ + /* TODO: wake up tasks blocked on this MSC's PCC channel */ +} + +static void mpam_msc_drv_remove(struct platform_device *pdev) +{ + struct mpam_msc *msc = platform_get_drvdata(pdev); + + if (!msc) + return; + + mutex_lock(&mpam_list_lock); + mpam_msc_destroy(msc); + mutex_unlock(&mpam_list_lock); + + mpam_free_garbage(); +} + +static struct mpam_msc *do_mpam_msc_drv_probe(struct platform_device *pdev) +{ + int err; + char name[20]; + struct mpam_msc *msc; + struct of_phandle_args of_args; + struct device *dev = &pdev->dev; + + lockdep_assert_held(&mpam_list_lock); + + msc = devm_kzalloc(&pdev->dev, sizeof(*msc), GFP_KERNEL); + if (!msc) + return ERR_PTR(-ENOMEM); + init_garbage(&msc->garbage); + msc->garbage.pdev = pdev; + + mutex_init(&msc->probe_lock); + mutex_init(&msc->part_sel_lock); + mutex_init(&msc->error_irq_lock); + mpam_mon_sel_lock_init(msc); + msc->id = pdev->id; + msc->pdev = pdev; + INIT_LIST_HEAD_RCU(&msc->all_msc_list); + INIT_LIST_HEAD_RCU(&msc->ris); + + err = update_msc_accessibility(msc); + if (err) + return ERR_PTR(err); + if (cpumask_empty(&msc->accessibility)) { + dev_err_once(dev, "MSC is not accessible from any CPU!"); + return ERR_PTR(-EINVAL); + } + + err = mpam_msc_setup_error_irq(msc); + if (err) + return ERR_PTR(err); + + if (!device_property_read_u32(&pdev->dev, "pcc-channel", + &msc->pcc_subspace_id)) { + msc->iface = MPAM_IFACE_PCC; + } else if (!of_parse_phandle_with_fixed_args(pdev->dev.of_node, + "mpam-fb", 1, 0, + &of_args)) { + msc->iface = MPAM_IFACE_SCMI; + } else { + msc->iface = MPAM_IFACE_MMIO; + } + + if (msc->iface == MPAM_IFACE_MMIO) { + void __iomem *io; + struct resource *msc_res; + + io = devm_platform_get_and_ioremap_resource(pdev, 0, + &msc_res); + if (IS_ERR(io)) { + dev_err_once(dev, "Failed to map MSC base address\n"); + return (void *)io; + } + msc->mapped_hwpage_sz = msc_res->end - msc_res->start; + msc->mapped_hwpage = io; + } else if (msc->iface == MPAM_IFACE_PCC) { + msc->pcc_chan = mpam_pcc_get(msc->pcc_subspace_id, true, GFP_KERNEL); + if (IS_ERR(msc->pcc_chan)) { + pr_err("Failed to request MSC PCC channel\n"); + return (void *)msc->pcc_chan; + } + } else if (msc->iface == MPAM_IFACE_SCMI) { + err = mpam_fb_connect_channel(of_args.np, + &msc->mpam_fb_chan); + if (err < 0) + return ERR_PTR(err); + + if (of_args.args_count > 0) + msc->mpam_fb_msc_id = of_args.args[0]; + else + msc->mpam_fb_msc_id = 0; + } + + list_add_rcu(&msc->all_msc_list, &mpam_all_msc); + platform_set_drvdata(pdev, msc); + + snprintf(name, sizeof(name), "msc.%u", msc->id); + msc->debugfs = debugfs_create_dir(name, mpam_debugfs); + debugfs_create_x32("max_nrdy_usec", 0400, msc->debugfs, &msc->nrdy_usec); + + return msc; +} + +static int mpam_msc_drv_probe(struct platform_device *pdev) +{ + int err; + struct mpam_msc *msc = NULL; + void *plat_data = pdev->dev.platform_data; + + mutex_lock(&mpam_list_lock); + msc = do_mpam_msc_drv_probe(pdev); + mutex_unlock(&mpam_list_lock); + if (!IS_ERR(msc)) { + /* Create RIS entries described by firmware */ + if (!acpi_disabled) + err = acpi_mpam_parse_resources(msc, plat_data); + else + err = mpam_dt_parse_resources(msc, plat_data); + if (err) + mpam_msc_drv_remove(pdev); + } else { + err = PTR_ERR(msc); + } + + if (!err && atomic_add_return(1, &mpam_num_msc) == fw_num_msc) + mpam_register_cpuhp_callbacks(mpam_discovery_cpu_online, NULL, + "mpam:drv_probe"); + + return err; +} + +static const struct of_device_id mpam_of_match[] = { + { .compatible = "arm,mpam-msc", }, + {}, +}; +MODULE_DEVICE_TABLE(of, mpam_of_match); + +static struct platform_driver mpam_msc_driver = { + .driver = { + .name = "mpam_msc", + .of_match_table = of_match_ptr(mpam_of_match), + }, + .probe = mpam_msc_drv_probe, + .remove = mpam_msc_drv_remove, +}; + +/* Any of these features mean the BWA_WD field is valid. */ +static bool mpam_has_bwa_wd_feature(struct mpam_props *props) +{ + if (mpam_has_feature(mpam_feat_mbw_min, props)) + return true; + if (mpam_has_feature(mpam_feat_mbw_max, props)) + return true; + if (mpam_has_feature(mpam_feat_mbw_prop, props)) + return true; + return false; +} + +/* Any of these features mean the CMAX_WD field is valid. */ +static bool mpam_has_cmax_wd_feature(struct mpam_props *props) +{ + if (mpam_has_feature(mpam_feat_cmax_cmax, props)) + return true; + if (mpam_has_feature(mpam_feat_cmax_cmin, props)) + return true; + return false; +} + +#define MISMATCHED_HELPER(parent, child, helper, field, alias) \ + helper(parent) && \ + ((helper(child) && (parent)->field != (child)->field) || \ + (!helper(child) && !(alias))) + +#define MISMATCHED_FEAT(parent, child, feat, field, alias) \ + mpam_has_feature((feat), (parent)) && \ + ((mpam_has_feature((feat), (child)) && (parent)->field != (child)->field) || \ + (!mpam_has_feature((feat), (child)) && !(alias))) + +#define CAN_MERGE_FEAT(parent, child, feat, alias) \ + (alias) && !mpam_has_feature((feat), (parent)) && \ + mpam_has_feature((feat), (child)) + +/* + * Combine two props fields. + * If this is for controls that alias the same resource, it is safe to just + * copy the values over. If two aliasing controls implement the same scheme + * a safe value must be picked. + * For non-aliasing controls, these control different resources, and the + * resulting safe value must be compatible with both. When merging values in + * the tree, all the aliasing resources must be handled first. + * On mismatch, parent is modified. + */ +static void __props_mismatch(struct mpam_props *parent, + struct mpam_props *child, bool alias) +{ + if (CAN_MERGE_FEAT(parent, child, mpam_feat_cpor_part, alias)) { + parent->cpbm_wd = child->cpbm_wd; + } else if (MISMATCHED_FEAT(parent, child, mpam_feat_cpor_part, + cpbm_wd, alias)) { + pr_debug("cleared cpor_part\n"); + mpam_clear_feature(mpam_feat_cpor_part, parent); + parent->cpbm_wd = 0; + } + + if (CAN_MERGE_FEAT(parent, child, mpam_feat_mbw_part, alias)) { + parent->mbw_pbm_bits = child->mbw_pbm_bits; + } else if (MISMATCHED_FEAT(parent, child, mpam_feat_mbw_part, + mbw_pbm_bits, alias)) { + pr_debug("cleared mbw_part\n"); + mpam_clear_feature(mpam_feat_mbw_part, parent); + parent->mbw_pbm_bits = 0; + } + + /* bwa_wd is a count of bits, fewer bits means less precision */ + if (alias && !mpam_has_bwa_wd_feature(parent) && + mpam_has_bwa_wd_feature(child)) { + parent->bwa_wd = child->bwa_wd; + } else if (MISMATCHED_HELPER(parent, child, mpam_has_bwa_wd_feature, + bwa_wd, alias)) { + pr_debug("took the min bwa_wd\n"); + parent->bwa_wd = min(parent->bwa_wd, child->bwa_wd); + } + + if (alias && !mpam_has_cmax_wd_feature(parent) && mpam_has_cmax_wd_feature(child)) { + parent->cmax_wd = child->cmax_wd; + } else if (MISMATCHED_HELPER(parent, child, mpam_has_cmax_wd_feature, + cmax_wd, alias)) { + pr_debug("%s took the min cmax_wd\n", __func__); + parent->cmax_wd = min(parent->cmax_wd, child->cmax_wd); + } + + if (CAN_MERGE_FEAT(parent, child, mpam_feat_cmax_cassoc, alias)) { + parent->cassoc_wd = child->cassoc_wd; + } else if (MISMATCHED_FEAT(parent, child, mpam_feat_cmax_cassoc, + cassoc_wd, alias)) { + pr_debug("%s cleared cassoc_wd\n", __func__); + mpam_clear_feature(mpam_feat_cmax_cassoc, parent); + parent->cassoc_wd = 0; + } + + /* For num properties, take the minimum */ + if (CAN_MERGE_FEAT(parent, child, mpam_feat_msmon_csu, alias)) { + parent->num_csu_mon = child->num_csu_mon; + } else if (MISMATCHED_FEAT(parent, child, mpam_feat_msmon_csu, + num_csu_mon, alias)) { + pr_debug("took the min num_csu_mon\n"); + parent->num_csu_mon = min(parent->num_csu_mon, + child->num_csu_mon); + } + + if (CAN_MERGE_FEAT(parent, child, mpam_feat_msmon_mbwu, alias)) { + parent->num_mbwu_mon = child->num_mbwu_mon; + } else if (MISMATCHED_FEAT(parent, child, mpam_feat_msmon_mbwu, + num_mbwu_mon, alias)) { + pr_debug("took the min num_mbwu_mon\n"); + parent->num_mbwu_mon = min(parent->num_mbwu_mon, + child->num_mbwu_mon); + } + + if (CAN_MERGE_FEAT(parent, child, mpam_feat_intpri_part, alias)) { + parent->intpri_wd = child->intpri_wd; + } else if (MISMATCHED_FEAT(parent, child, mpam_feat_intpri_part, + intpri_wd, alias)) { + pr_debug("%s took the min intpri_wd\n", __func__); + parent->intpri_wd = min(parent->intpri_wd, child->intpri_wd); + } + + if (CAN_MERGE_FEAT(parent, child, mpam_feat_dspri_part, alias)) { + parent->dspri_wd = child->dspri_wd; + } else if (MISMATCHED_FEAT(parent, child, mpam_feat_dspri_part, + dspri_wd, alias)) { + pr_debug("%s took the min dspri_wd\n", __func__); + parent->dspri_wd = min(parent->dspri_wd, child->dspri_wd); + } + + /* TODO: alias support for these two */ + /* {int,ds}pri may not have differing 0-low behaviour */ + if (mpam_has_feature(mpam_feat_intpri_part, parent) && + (!mpam_has_feature(mpam_feat_intpri_part, child) || + mpam_has_feature(mpam_feat_intpri_part_0_low, parent) != + mpam_has_feature(mpam_feat_intpri_part_0_low, child))) { + pr_debug("%s cleared intpri_part\n", __func__); + mpam_clear_feature(mpam_feat_intpri_part, parent); + mpam_clear_feature(mpam_feat_intpri_part_0_low, parent); + } + if (mpam_has_feature(mpam_feat_dspri_part, parent) && + (!mpam_has_feature(mpam_feat_dspri_part, child) || + mpam_has_feature(mpam_feat_dspri_part_0_low, parent) != + mpam_has_feature(mpam_feat_dspri_part_0_low, child))) { + pr_debug("%s cleared dspri_part\n", __func__); + mpam_clear_feature(mpam_feat_dspri_part, parent); + mpam_clear_feature(mpam_feat_dspri_part_0_low, parent); + } + + if (alias) { + /* Merge features for aliased resources */ + bitmap_or(parent->features, parent->features, child->features, MPAM_FEATURE_LAST); + } else { + /* Clear missing features for non aliasing */ + bitmap_and(parent->features, parent->features, child->features, MPAM_FEATURE_LAST); + } +} + +/* + * If a vmsc doesn't match class feature/configuration, do the right thing(tm). + * For 'num' properties we can just take the minimum. + * For properties where the mismatched unused bits would make a difference, we + * nobble the class feature, as we can't configure all the resources. + * e.g. The L3 cache is composed of two resources with 13 and 17 portion + * bitmaps respectively. + * Quirks on an MSC will apply to all MSC in that class. + */ +static void +__class_props_mismatch(struct mpam_class *class, struct mpam_vmsc *vmsc) +{ + struct mpam_props *cprops = &class->props; + struct mpam_props *vprops = &vmsc->props; + struct device *dev = &vmsc->msc->pdev->dev; + + lockdep_assert_held(&mpam_list_lock); /* we modify class */ + + dev_dbg(dev, "Merging features for class:0x%lx &= vmsc:0x%lx\n", + (long)cprops->features, (long)vprops->features); + + /* Merge quirks */ + class->quirks |= vmsc->msc->quirks; + + /* Take the safe value for any common features */ + __props_mismatch(cprops, vprops, false); +} + +static void +__vmsc_props_mismatch(struct mpam_vmsc *vmsc, struct mpam_msc_ris *ris) +{ + struct mpam_props *rprops = &ris->props; + struct mpam_props *vprops = &vmsc->props; + struct device *dev = &vmsc->msc->pdev->dev; + + lockdep_assert_held(&mpam_list_lock); /* we modify vmsc */ + + dev_dbg(dev, "Merging features for vmsc:0x%lx |= ris:0x%lx\n", + (long)vprops->features, (long)rprops->features); + + /* + * Merge mismatched features - Copy any features that aren't common, + * but take the safe value for any common features. + */ + __props_mismatch(vprops, rprops, true); +} + +/* + * Copy the first component's first vMSC's properties and features to the + * class. __class_props_mismatch() will remove conflicts. + * It is not possible to have a class with no components, or a component with + * no resources. The vMSC properties have already been built. + */ +static void mpam_enable_init_class_features(struct mpam_class *class) +{ + struct mpam_vmsc *vmsc; + struct mpam_component *comp; + + comp = list_first_entry(&class->components, + struct mpam_component, class_list); + vmsc = list_first_entry(&comp->vmsc, + struct mpam_vmsc, comp_list); + + class->props = vmsc->props; +} + +static void mpam_enable_merge_vmsc_features(struct mpam_component *comp) +{ + struct mpam_vmsc *vmsc; + struct mpam_msc_ris *ris; + struct mpam_class *class = comp->class; + + list_for_each_entry(vmsc, &comp->vmsc, comp_list) { + list_for_each_entry(ris, &vmsc->ris, vmsc_list) { + __vmsc_props_mismatch(vmsc, ris); + class->nrdy_usec = max(class->nrdy_usec, + vmsc->msc->nrdy_usec); + } + } +} + +static void mpam_enable_merge_class_features(struct mpam_component *comp) +{ + struct mpam_vmsc *vmsc; + struct mpam_class *class = comp->class; + + list_for_each_entry(vmsc, &comp->vmsc, comp_list) + __class_props_mismatch(class, vmsc); +} + +/* + * Merge all the common resource features into class. + * vmsc features are bitwise-or'd together by mpam_enable_merge_vmsc_features() + * as the first step so that mpam_enable_init_class_features() can initialise + * the class with a representive set of features. + * Next the mpam_enable_merge_class_features() bitwise-and's all the vmsc + * features to form the class features. + * Other features are the min/max as appropriate. + * + * To avoid walking the whole tree twice, the class->nrdy_usec property is + * updated when working with the vmsc as it is a max(), and doesn't need + * initialising first. + */ +static void mpam_enable_merge_features(struct list_head *all_classes_list) +{ + struct mpam_class *class; + struct mpam_component *comp; + + lockdep_assert_held(&mpam_list_lock); + + list_for_each_entry(class, all_classes_list, classes_list) { + list_for_each_entry(comp, &class->components, class_list) + mpam_enable_merge_vmsc_features(comp); + + mpam_enable_init_class_features(class); + + list_for_each_entry(comp, &class->components, class_list) + mpam_enable_merge_class_features(comp); + } +} + +static char *mpam_errcode_names[16] = { + [MPAM_ERRCODE_NONE] = "No error", + [MPAM_ERRCODE_PARTID_SEL_RANGE] = "PARTID_SEL_Range", + [MPAM_ERRCODE_REQ_PARTID_RANGE] = "Req_PARTID_Range", + [MPAM_ERRCODE_MSMONCFG_ID_RANGE] = "MSMONCFG_ID_RANGE", + [MPAM_ERRCODE_REQ_PMG_RANGE] = "Req_PMG_Range", + [MPAM_ERRCODE_MONITOR_RANGE] = "Monitor_Range", + [MPAM_ERRCODE_INTPARTID_RANGE] = "intPARTID_Range", + [MPAM_ERRCODE_UNEXPECTED_INTERNAL] = "Unexpected_INTERNAL", + [MPAM_ERRCODE_UNDEFINED_RIS_PART_SEL] = "Undefined_RIS_PART_SEL", + [MPAM_ERRCODE_RIS_NO_CONTROL] = "RIS_No_Control", + [MPAM_ERRCODE_UNDEFINED_RIS_MON_SEL] = "Undefined_RIS_MON_SEL", + [MPAM_ERRCODE_RIS_NO_MONITOR] = "RIS_No_Monitor", + [12 ... 15] = "Reserved" +}; + +static int mpam_enable_msc_ecr(void *_msc) +{ + struct mpam_msc *msc = _msc; + + __mpam_write_reg(msc, MPAMF_ECR, MPAMF_ECR_INTEN); + + return 0; +} + +/* This can run in mpam_disable(), and the interrupt handler on the same CPU */ +static int mpam_disable_msc_ecr(void *_msc) +{ + struct mpam_msc *msc = _msc; + + __mpam_write_reg(msc, MPAMF_ECR, 0); + + return 0; +} + +static irqreturn_t __mpam_irq_handler(int irq, struct mpam_msc *msc) +{ + u64 reg; + u16 partid; + u8 errcode, pmg, ris; + + if (WARN_ON_ONCE(!msc) || + WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), + &msc->accessibility))) + return IRQ_NONE; + + reg = mpam_msc_read_esr(msc); + + errcode = FIELD_GET(MPAMF_ESR_ERRCODE, reg); + if (!errcode) + return IRQ_NONE; + + /* Clear level triggered irq */ + mpam_msc_clear_esr(msc); + + partid = FIELD_GET(MPAMF_ESR_PARTID_MON, reg); + pmg = FIELD_GET(MPAMF_ESR_PMG, reg); + ris = FIELD_GET(MPAMF_ESR_RIS, reg); + + pr_err_ratelimited("error irq from msc:%u '%s', partid:%u, pmg: %u, ris: %u\n", + msc->id, mpam_errcode_names[errcode], partid, pmg, + ris); + + /* Disable this interrupt. */ + mpam_disable_msc_ecr(msc); + + /* Are we racing with the thread disabling MPAM? */ + if (!mpam_is_enabled()) + return IRQ_HANDLED; + + /* + * Schedule the teardown work. Don't use a threaded IRQ as we can't + * unregister the interrupt from the threaded part of the handler. + */ + mpam_disable_reason = "hardware error interrupt"; + schedule_work(&mpam_broken_work); + + return IRQ_HANDLED; +} + +static irqreturn_t mpam_ppi_handler(int irq, void *dev_id) +{ + struct mpam_msc *msc = *(struct mpam_msc **)dev_id; + + return __mpam_irq_handler(irq, msc); +} + +static irqreturn_t mpam_spi_handler(int irq, void *dev_id) +{ + struct mpam_msc *msc = dev_id; + + return __mpam_irq_handler(irq, msc); +} + +static int mpam_register_irqs(void) +{ + int err, irq; + struct mpam_msc *msc; + + lockdep_assert_cpus_held(); + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list, + srcu_read_lock_held(&mpam_srcu)) { + irq = platform_get_irq_byname_optional(msc->pdev, "error"); + if (irq <= 0) + continue; + + /* The MPAM spec says the interrupt can be SPI, PPI or LPI */ + /* We anticipate sharing the interrupt with other MSCs */ + if (irq_is_percpu(irq)) { + err = request_percpu_irq(irq, &mpam_ppi_handler, + "mpam:msc:error", + msc->error_dev_id); + if (err) + return err; + + msc->reenable_error_ppi = irq; + smp_call_function_many(&msc->accessibility, + &_enable_percpu_irq, &irq, + true); + } else { + err = devm_request_irq(&msc->pdev->dev,irq, + &mpam_spi_handler, IRQF_SHARED, + "mpam:msc:error", msc); + if (err) + return err; + } + + mutex_lock(&msc->error_irq_lock); + msc->error_irq_req = true; + mpam_touch_msc(msc, mpam_enable_msc_ecr, msc); + msc->error_irq_hw_enabled = true; + mutex_unlock(&msc->error_irq_lock); + } + + return 0; +} + +static void mpam_unregister_irqs(void) +{ + int irq; + struct mpam_msc *msc; + + guard(cpus_read_lock)(); + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list, + srcu_read_lock_held(&mpam_srcu)) { + irq = platform_get_irq_byname_optional(msc->pdev, "error"); + if (irq <= 0) + continue; + + mutex_lock(&msc->error_irq_lock); + if (msc->error_irq_hw_enabled) { + mpam_touch_msc(msc, mpam_disable_msc_ecr, msc); + msc->error_irq_hw_enabled = false; + } + + if (msc->error_irq_req) { + if (irq_is_percpu(irq)) { + msc->reenable_error_ppi = 0; + free_percpu_irq(irq, msc->error_dev_id); + } else { + devm_free_irq(&msc->pdev->dev, irq, msc); + } + msc->error_irq_req = false; + } + mutex_unlock(&msc->error_irq_lock); + } +} + +static void __destroy_component_cfg(struct mpam_component *comp) +{ + struct mpam_msc *msc; + struct mpam_vmsc *vmsc; + struct mpam_msc_ris *ris; + + lockdep_assert_held(&mpam_list_lock); + + add_to_garbage(comp->cfg); + list_for_each_entry(vmsc, &comp->vmsc, comp_list) { + msc = vmsc->msc; + + mpam_mon_sel_outer_lock(msc); + if (mpam_mon_sel_inner_lock(msc)) { + list_for_each_entry(ris, &vmsc->ris, vmsc_list) + add_to_garbage(ris->mbwu_state); + mpam_mon_sel_inner_unlock(msc); + } + mpam_mon_sel_outer_unlock(msc); + } +} + +static void mpam_reset_component_cfg(struct mpam_component *comp) +{ + int i; + struct mpam_class *class = comp->class; + + mpam_assert_partid_sizes_fixed(); + + if (!comp->cfg) + return; + + for (i = 0; i < mpam_partid_max + 1; i++) { + mpam_init_reset_cfg(&comp->cfg[i]); + if (mpam_has_quirk(T241_FORCE_MBW_MIN_TO_ONE, class)) + mpam_wa_t241_force_mbw_min_to_one(&comp->cfg[i], + &class->props); + } +} + +static int __allocate_component_cfg(struct mpam_component *comp) +{ + struct mpam_vmsc *vmsc; + + mpam_assert_partid_sizes_fixed(); + + if (comp->cfg) + return 0; + + comp->cfg = kcalloc(mpam_partid_max + 1, sizeof(*comp->cfg), GFP_KERNEL); + if (!comp->cfg) + return -ENOMEM; + + /* + * The array is free()d in one go, so only cfg[0]'s struture needs + * to be initialised. + */ + init_garbage(&comp->cfg[0].garbage); + + mpam_reset_component_cfg(comp); + + list_for_each_entry(vmsc, &comp->vmsc, comp_list) { + int err = 0; + struct mpam_msc *msc; + struct mpam_msc_ris *ris; + struct msmon_mbwu_state *mbwu_state; + + if (!vmsc->props.num_mbwu_mon) + continue; + + msc = vmsc->msc; + mpam_mon_sel_outer_lock(msc); + list_for_each_entry(ris, &vmsc->ris, vmsc_list) { + if (!ris->props.num_mbwu_mon) + continue; + + mbwu_state = kcalloc(ris->props.num_mbwu_mon, + sizeof(*ris->mbwu_state), + GFP_KERNEL); + if (!mbwu_state) { + __destroy_component_cfg(comp); + err = -ENOMEM; + break; + } + + init_garbage(&mbwu_state[0].garbage); + + if (mpam_mon_sel_inner_lock(msc)) { + ris->mbwu_state = mbwu_state; + mpam_mon_sel_inner_unlock(msc); + } + } + mpam_mon_sel_outer_unlock(msc); + + if (err) + return err; + } + + return 0; +} + +static int mpam_allocate_config(void) +{ + struct mpam_class *class; + struct mpam_component *comp; + + lockdep_assert_held(&mpam_list_lock); + + list_for_each_entry(class, &mpam_classes, classes_list) { + list_for_each_entry(comp, &class->components, class_list) { + int err = __allocate_component_cfg(comp); + if (err) + return err; + } + } + + return 0; +} + +static void mpam_debugfs_setup_ris(struct mpam_msc_ris *ris) +{ + char name[40]; + struct dentry *d; + struct mpam_props *rprops = &ris->props; + + snprintf(name, sizeof(name), "ris.%u", ris->ris_idx); + d = debugfs_create_dir(name, ris->vmsc->msc->debugfs); + debugfs_create_x64("mpamf_idr", 0400, d, &ris->idr); + debugfs_create_x32("mpamf_cpor_idr", 0400, d, &ris->cpor_idr); + debugfs_create_x32("mpamf_ccap_idr", 0400, d, &ris->ccap_idr); + debugfs_create_ulong("features", 0400, d, &rprops->features[0]); + debugfs_create_x16("cpbm_wd", 0400, d, &rprops->cpbm_wd); + debugfs_create_x16("mbw_pbm_bits", 0400, d, &rprops->mbw_pbm_bits); + debugfs_create_x16("num_csu_mon", 0400, d, &rprops->num_csu_mon); + debugfs_create_x16("num_mbwu_mon", 0400, d, &rprops->num_mbwu_mon); + debugfs_create_cpumask("affinity", 0400, d, &ris->affinity); + ris->debugfs = d; +} + +static void mpam_debugfs_setup_vmsc(struct mpam_component *comp, + struct mpam_vmsc *vmsc) +{ + u8 ris_idx; + char name[40]; + char path[40]; + struct dentry *d; + struct mpam_msc_ris *ris; + int msc_id = vmsc->msc->id; + + snprintf(name, sizeof(name), "vmsc.%u", msc_id); + d = debugfs_create_dir(name, comp->debugfs); + debugfs_create_ulong("features", 0400, d, &vmsc->props.features[0]); + vmsc->debugfs = d; + + list_for_each_entry_rcu(ris, &vmsc->ris, vmsc_list) { + ris_idx = ris->ris_idx; + + snprintf(name, sizeof(name), "msc.%u_ris.%u", msc_id, + ris_idx); + snprintf(path, sizeof(path), "../../../msc.%u/ris.%u", + msc_id, ris_idx); + debugfs_create_symlink(name, d, path); + } +} + +static void mpam_debugfs_setup_comp(struct mpam_class *class, + struct mpam_component *comp) +{ + char name[40]; + struct dentry *d; + struct mpam_vmsc *vmsc; + + snprintf(name, sizeof(name), "comp.%u", comp->comp_id); + d = debugfs_create_dir(name, class->debugfs); + comp->debugfs = d; + + list_for_each_entry_rcu(vmsc, &comp->vmsc, comp_list) + mpam_debugfs_setup_vmsc(comp, vmsc); +} + +static void mpam_debugfs_setup(void) +{ + char name[40]; + struct dentry *d; + struct mpam_msc *msc; + struct mpam_class *class; + struct mpam_msc_ris *ris; + struct mpam_component *comp; + + lockdep_assert_held(&mpam_list_lock); + + list_for_each_entry(msc, &mpam_all_msc, all_msc_list) { + d = msc->debugfs; + debugfs_create_u32("fw_id", 0400, d, &msc->pdev->id); + debugfs_create_x32("iface", 0400, d, &msc->iface); + debugfs_create_x32("mpamf_iidr", 0400, d, &msc->iidr); + debugfs_create_x64("nrdy_retry_count", 0400, d, &msc->nrdy_retry_count); + list_for_each_entry(ris, &msc->ris, msc_list) + mpam_debugfs_setup_ris(ris); + } + + list_for_each_entry_rcu(class, &mpam_classes, classes_list) { + snprintf(name, sizeof(name), "class.%u", class->level); + d = debugfs_create_dir(name, mpam_debugfs); + debugfs_create_ulong("features", 0400, d, &class->props.features[0]); + debugfs_create_x32("nrdy_usec", 0400, d, &class->nrdy_usec); + debugfs_create_x16("quirks", 0400, d, &class->quirks); + debugfs_create_x8("level", 0400, d, &class->level); + debugfs_create_cpumask("affinity", 0400, d, &class->affinity); + class->debugfs = d; + + list_for_each_entry_rcu(comp, &class->components, class_list) + mpam_debugfs_setup_comp(class, comp); + } +} + +static int mpam_force_disable_show(struct seq_file *s, void *data) +{ + seq_puts(s, "Write 1 to this file to trigger an MPAM error.\n"); + return 0; +} + +static ssize_t mpam_force_disable_write(struct file *file, + const char __user *userbuf, size_t count, + loff_t *ppos) +{ + u32 user_val; + int err; + + err = kstrtou32_from_user(userbuf, count, 10, &user_val); + if (err) + return err; + + if (user_val == 1) { + mpam_disable_reason = "debugfs trigger"; + mpam_disable(NULL); + } + + return count; +} + +DEFINE_SHOW_STORE_ATTRIBUTE(mpam_force_disable); + +static void mpam_enable_once(void) +{ + int err; + + /* + * Once the cpuhp callbacks have been changed, mpam_partid_max can no + * longer change. + */ + spin_lock(&partid_max_lock); + partid_max_published = true; + spin_unlock(&partid_max_lock); + + /* + * If all the MSC have been probed, enabling the IRQs happens next. + * That involves cross-calling to a CPU that can reach the MSC, and + * the locks must be taken in this order: + */ + cpus_read_lock(); + mutex_lock(&mpam_list_lock); + do { + mpam_enable_merge_features(&mpam_classes); + + err = mpam_register_irqs(); + if (err) { + pr_warn("Failed to register irqs: %d\n", err); + break; + } + + err = mpam_allocate_config(); + if (err) { + pr_err("Failed to allocate configuration arrays.\n"); + break; + } + + mpam_debugfs_setup(); + } while (0); + mutex_unlock(&mpam_list_lock); + cpus_read_unlock(); + + debugfs_create_file("force_disable", 0600, mpam_debugfs, NULL, + &mpam_force_disable_fops); + + if (!err) { + err = mpam_resctrl_setup(); + if (err) + pr_err("Failed to initialise resctrl: %d\n", err); + } + + if (err) { + mpam_disable_reason = "Failed to enable."; + schedule_work(&mpam_broken_work); + return; + } + + static_branch_enable(&mpam_enabled); + mpam_resctrl_enabled = true; + mpam_register_cpuhp_callbacks(mpam_cpu_online, mpam_cpu_offline, + "mpam:online"); + + /* Use printk() to avoid the pr_fmt adding the function name. */ + printk(KERN_INFO "MPAM enabled with %u PARTIDs and %u PMGs\n", + mpam_partid_max + 1, mpam_pmg_max + 1); +} + +void mpam_reset_component_locked(struct mpam_component *comp) +{ + + struct mpam_vmsc *vmsc; + + lockdep_assert_cpus_held(); + mpam_assert_partid_sizes_fixed(); + + mpam_reset_component_cfg(comp); + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(vmsc, &comp->vmsc, comp_list, + srcu_read_lock_held(&mpam_srcu)) { + struct mpam_msc *msc = vmsc->msc; + struct mpam_msc_ris *ris; + + list_for_each_entry_srcu(ris, &vmsc->ris, vmsc_list, + srcu_read_lock_held(&mpam_srcu)) { + if (!ris->in_reset_state) + mpam_touch_msc(msc, mpam_reset_ris, ris); + ris->in_reset_state = true; + } + } +} + +void mpam_reset_class_locked(struct mpam_class *class) +{ + struct mpam_component *comp; + + lockdep_assert_cpus_held(); + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(comp, &class->components, class_list, + srcu_read_lock_held(&mpam_srcu)) + mpam_reset_component_locked(comp); +} + +void mpam_reset_class(struct mpam_class *class) +{ + cpus_read_lock(); + mpam_reset_class_locked(class); + cpus_read_unlock(); +} + +/* + * Called in response to an error IRQ. + * All of MPAMs errors indicate a software bug, restore any modified + * controls to their reset values. + */ +void mpam_disable(struct work_struct *ignored) +{ + int idx; + bool do_resctrl_exit; + struct mpam_class *class; + struct mpam_msc *msc, *tmp; + + if (mpam_is_enabled()) + static_branch_disable(&mpam_enabled); + + mutex_lock(&mpam_cpuhp_state_lock); + if (mpam_cpuhp_state) { + cpuhp_remove_state(mpam_cpuhp_state); + mpam_cpuhp_state = 0; + } + + /* mpam_cpu_offline() tells resctrl all the CPUs are offline. */ + do_resctrl_exit = mpam_resctrl_enabled; + mpam_resctrl_enabled = false; + mutex_unlock(&mpam_cpuhp_state_lock); + + if (do_resctrl_exit) + mpam_resctrl_exit(); + + mpam_unregister_irqs(); + + idx = srcu_read_lock(&mpam_srcu); + list_for_each_entry_srcu(class, &mpam_classes, classes_list, + srcu_read_lock_held(&mpam_srcu)) + mpam_reset_class(class); + srcu_read_unlock(&mpam_srcu, idx); + + mutex_lock(&mpam_list_lock); + list_for_each_entry_safe(msc, tmp, &mpam_all_msc, all_msc_list) + mpam_msc_destroy(msc); + mutex_unlock(&mpam_list_lock); + mpam_free_garbage(); + + pr_err_once("MPAM disabled due to %s\n", mpam_disable_reason); +} + +/* + * Enable mpam once all devices have been probed. + * Scheduled by mpam_discovery_cpu_online() once all devices have been created. + * Also scheduled when new devices are probed when new CPUs come online. + */ +void mpam_enable(struct work_struct *work) +{ + static atomic_t once; + struct mpam_msc *msc; + bool all_devices_probed = true; + + /* Have we probed all the hw devices? */ + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list, + srcu_read_lock_held(&mpam_srcu)) { + mutex_lock(&msc->probe_lock); + if (!msc->probed) + all_devices_probed = false; + mutex_unlock(&msc->probe_lock); + + if (!all_devices_probed) + break; + } + + if (all_devices_probed && !atomic_fetch_inc(&once)) + mpam_enable_once(); +} + +struct mpam_write_config_arg { + struct mpam_msc_ris *ris; + struct mpam_component *comp; + u16 partid; +}; + +static int __write_config(void *arg) +{ + struct mpam_write_config_arg *c = arg; + + mpam_reprogram_ris_partid(c->ris, c->partid, &c->comp->cfg[c->partid]); + + return 0; +} + +#define maybe_update_config(cfg, feature, newcfg, member, changes) do { \ + if (mpam_has_feature(feature, newcfg) && \ + (newcfg)->member != (cfg)->member) { \ + (cfg)->member = (newcfg)->member; \ + mpam_set_feature(feature, cfg); \ + \ + (changes) = true; \ + } \ +} while (0) + +static bool mpam_update_config(struct mpam_config *cfg, + const struct mpam_config *newcfg) +{ + bool has_changes = false; + + maybe_update_config(cfg, mpam_feat_cpor_part, newcfg, cpbm, has_changes); + maybe_update_config(cfg, mpam_feat_cmax_cmax, newcfg, cmax, has_changes); + maybe_update_config(cfg, mpam_feat_mbw_part, newcfg, mbw_pbm, has_changes); + maybe_update_config(cfg, mpam_feat_mbw_max, newcfg, mbw_max, has_changes); + maybe_update_config(cfg, mpam_feat_mbw_min, newcfg, mbw_min, has_changes); + + return has_changes; +} + +static void mpam_extend_config(struct mpam_class *class, struct mpam_config *cfg) +{ + struct mpam_props *cprops = &class->props; + u16 min, min_hw_granule, delta; + u16 max_hw_value, res0_bits; + + /* + * Calculate the values the 'min' control can hold. + * e.g. on a platform with bwa_wd = 8, min_hw_granule is 0x00ff because + * those bits are RES0. Configurations of this value are effectively + * zero. But configurations need to saturate at min_hw_granule on + * systems with mismatched bwa_wd, where the 'less than 0' values are + * implemented on some MSC, but not others. + */ + res0_bits = 16 - cprops->bwa_wd; + max_hw_value = ((1 << cprops->bwa_wd) - 1) << res0_bits; + min_hw_granule = ~max_hw_value; + + /* + * MAX and MIN should be set together. If only one is provided, + * generate a configuration for the other. If only one control + * type is supported, the other value will be ignored. + * + * Resctrl can only configure the MAX. + */ + if (mpam_has_feature(mpam_feat_mbw_max, cfg) && + !mpam_has_feature(mpam_feat_mbw_min, cfg)) { + delta = ((5 * MPAMCFG_MBW_MAX_MAX) / 100) - 1; + if (cfg->mbw_max > delta) + min = cfg->mbw_max - delta; + else + min = 0; + + cfg->mbw_min = max(min, min_hw_granule); + mpam_set_feature(mpam_feat_mbw_min, cfg); + } + + if (mpam_has_quirk(T241_FORCE_MBW_MIN_TO_ONE, class) && + cfg->mbw_min <= min_hw_granule) { + cfg->mbw_min = min_hw_granule + 1; + mpam_set_feature(mpam_feat_mbw_min, cfg); + } +} + +int mpam_apply_config(struct mpam_component *comp, u16 partid, + struct mpam_config *user_cfg) +{ + struct mpam_write_config_arg arg; + struct mpam_msc_ris *ris; + struct mpam_config cfg; + struct mpam_vmsc *vmsc; + struct mpam_msc *msc; + + lockdep_assert_cpus_held(); + + + /* Don't pass in the current config! */ + WARN_ON_ONCE(&comp->cfg[partid] == user_cfg); + + /* + * Copy the config to avoid writing back the 'extended' version to + * the caller. + * This avoids mpam_devices.c setting a mbm_min that mpam_resctrl.c + * is unaware of ... when it then changes mbm_max to be lower than + * mbm_min. + */ + cfg = *user_cfg; + + mpam_extend_config(comp->class, &cfg); + + if (!mpam_update_config(&comp->cfg[partid], &cfg)) + return 0; + + arg.comp = comp; + arg.partid = partid; + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(vmsc, &comp->vmsc, comp_list, + srcu_read_lock_held(&mpam_srcu)) { + msc = vmsc->msc; + + list_for_each_entry_srcu(ris, &vmsc->ris, vmsc_list, + srcu_read_lock_held(&mpam_srcu)) { + arg.ris = ris; + mpam_touch_msc(msc, __write_config, &arg); + } + } + + return 0; +} + +/* + * MSCs that are declared by the firmware as being part of a cache may not + * be created automatically as platform devices, since there is no + * dedicated cache driver. + * + * Deal with theo MSCs here. + */ +static void mpam_dt_create_foundling_msc(void) +{ + struct platform_device *pdev; + struct device_node *cache; + + for_each_compatible_node(cache, NULL, "cache") { + struct device_node *cache_device; + + if (of_node_check_flag(cache, OF_POPULATED)) + continue; + + cache_device = of_find_matching_node_and_match(cache, mpam_of_match, NULL); + if (!cache_device) + continue; + of_node_put(cache_device); + + pdev = of_platform_device_create(cache, "cache", NULL); + if (!pdev) + pr_err_once("Failed to create MSC devices under caches\n"); + } +} + +static int __init mpam_msc_driver_init(void) +{ + if (!system_supports_mpam()) + return -EOPNOTSUPP; + + init_srcu_struct(&mpam_srcu); + + if (!acpi_disabled) + fw_num_msc = acpi_mpam_count_msc(); + else + fw_num_msc = mpam_dt_count_msc(); + + if (fw_num_msc <= 0) { + pr_err("No MSC devices found in firmware\n"); + return -EINVAL; + } + + if (acpi_disabled) + mpam_dt_create_foundling_msc(); + + mpam_debugfs = debugfs_create_dir("mpam", NULL); + + return platform_driver_register(&mpam_msc_driver); +} +/* Must occur after arm64_mpam_register_cpus() from arch_initcall() */ +subsys_initcall(mpam_msc_driver_init); + +static int mpam_cmdline_partid_max_set(const char *arg, + const struct kernel_param *kp) +{ + int ret; + + spin_lock(&partid_max_lock); + ret = kstrtou16(arg, 10, &mpam_cmdline_partid_max); + if (!ret) + mpam_cmdline_partid_max_overridden = true; + spin_unlock(&partid_max_lock); + + return 0; +} +static int mpam_cmdline_partid_max_get(char *buffer, + const struct kernel_param *kp) +{ + u16 val = 0xffff; + + spin_lock(&partid_max_lock); + if (mpam_cmdline_partid_max_overridden) + val = mpam_cmdline_partid_max; + spin_unlock(&partid_max_lock); + + return sprintf(buffer, "%u\n", val); +} +static const struct kernel_param_ops mpam_cmdline_partid_max_ops = { + .set = mpam_cmdline_partid_max_set, + .get = mpam_cmdline_partid_max_get, +}; +module_param_cb(partid_max, &mpam_cmdline_partid_max_ops, NULL, 0644); +MODULE_PARM_DESC(partid_max, "Override for reducing the number of PARTID."); + +static DEFINE_XARRAY(mpam_force_unkown_msc); + +static void mpam_force_unknown_msc_add(u32 msc_id, gfp_t gfp) +{ + xa_store(&mpam_force_unkown_msc, msc_id, xa_mk_value(msc_id), gfp); +} + +bool mpam_force_unknown_msc_test(struct mpam_msc *msc) +{ + return !!xa_load(&mpam_force_unkown_msc, msc->pdev->id); +} + +static int mpam_force_unknown_msc_set(const char *_str, + const struct kernel_param *kp) +{ + int err; + u32 val; + char *tok, *iter; + char *str __free(kfree) = kstrdup(_str, GFP_KERNEL); + + iter = str; + do { + tok = strsep(&iter, ","); + err = kstrtou32(tok, 10, &val); + if (err) { + pr_err("Failed to parse commandline: %d\n", err); + break; + } + mpam_force_unknown_msc_add(val, GFP_KERNEL); + } while (iter); + + return 0; +} +static int mpam_force_unknown_msc_get(char *buffer, + const struct kernel_param *kp) +{ + unsigned long index, count = 0; + int result = 0; + void *entry; + + xa_for_each(&mpam_force_unkown_msc, index, entry) { + if (count) + result += sprintf(buffer + result, ","); + + result += sprintf(buffer + result, "%lu", index); + count += 1; + } + + result += sprintf(buffer + result, "\n"); + + return result; +} +static const struct kernel_param_ops mpam_force_unknown_msc_ops = { + .set = mpam_force_unknown_msc_set, + .get = mpam_force_unknown_msc_get, +}; +subsys_param_cb(force_unknown_msc, &mpam_force_unknown_msc_ops, NULL, 0644); +MODULE_PARM_DESC(force_unknown_msc, "Disabling a set of probed MSC."); + +#ifdef CONFIG_MPAM_KUNIT_TEST +#include "test_mpam_devices.c" +#endif diff --git a/drivers/resctrl/mpam_fb.c b/drivers/resctrl/mpam_fb.c new file mode 100644 index 0000000000000..af87a9e934cd0 --- /dev/null +++ b/drivers/resctrl/mpam_fb.c @@ -0,0 +1,207 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2024 Arm Ltd. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "mpam_fb.h" + +#define MPAM_MSC_ATTRIBUTES 0x3 +#define MPAM_MSC_READ 0x4 +#define MPAM_MSC_WRITE 0x5 + +static const struct scmi_mpam_proto_ops *mpam_scmi_ops; + +static DEFINE_MUTEX(scmi_agent_list_mutex); +static LIST_HEAD(smci_agent_list); + +struct scmi_mpam_agent { + struct list_head list; + struct device_node *of_node; + struct scmi_protocol_handle *ph_handle; +}; + +#define SCMI_BUF_LENGTH_IDX 4 +#define SCMI_BUF_HEADER_IDX 5 +#define SCMI_BUF_PAYLOAD_IDX 6 +#define SCMI_READ_MSG_SIZE 9 +#define SCMI_WRITE_MSG_SIZE 10 + +static int mpam_fb_build_read_message(int msc_id, int reg, u32 *msg_buf) +{ + memset(msg_buf, 0, SCMI_READ_MSG_SIZE * sizeof(u32)); + + msg_buf[SCMI_BUF_LENGTH_IDX] = SCMI_READ_MSG_SIZE * sizeof(u32); + msg_buf[SCMI_BUF_HEADER_IDX] = MPAM_MSC_READ | (0x1a << 10); + msg_buf[SCMI_BUF_PAYLOAD_IDX + 0] = msc_id; + msg_buf[SCMI_BUF_PAYLOAD_IDX + 2] = reg; + + return SCMI_READ_MSG_SIZE * sizeof(u32); +} + +static int mpam_fb_build_write_message(int msc_id, int reg, u32 val, + u32 *msg_buf) +{ + memset(msg_buf, 0, SCMI_WRITE_MSG_SIZE * sizeof(u32)); + + msg_buf[SCMI_BUF_LENGTH_IDX] = SCMI_WRITE_MSG_SIZE * sizeof(u32); + msg_buf[SCMI_BUF_HEADER_IDX] = MPAM_MSC_WRITE | (0x1a << 10); + msg_buf[SCMI_BUF_PAYLOAD_IDX + 0] = msc_id; + msg_buf[SCMI_BUF_PAYLOAD_IDX + 2] = reg; + msg_buf[SCMI_BUF_PAYLOAD_IDX + 3] = val; + + return SCMI_WRITE_MSG_SIZE * sizeof(u32); +} + +static struct scmi_protocol_handle *scmi_agent_get_ph(const struct device_node *np) +{ + struct scmi_mpam_agent *agent; + struct scmi_protocol_handle *ph = NULL; + + mutex_lock(&scmi_agent_list_mutex); + + list_for_each_entry(agent, &smci_agent_list, list) { + if (np == agent->of_node) { + ph = agent->ph_handle; + break; + } + } + + mutex_unlock(&scmi_agent_list_mutex); + + return ph; +} + +int mpam_fb_connect_channel(const struct device_node *of_node, + struct mpam_fb_channel *chan) +{ + int msc_id = 0; + + chan->ph_handle = scmi_agent_get_ph(of_node); + if (!chan->ph_handle) + return -EPROBE_DEFER; + + chan->use_scmi = true; + + return msc_id; +} + +int mpam_fb_send_read_request(struct mpam_fb_channel *chan, int msc_id, + u16 reg, u32 *result) +{ + u32 msg_buf[12]; + int msg_len; + + msg_len = mpam_fb_build_read_message(msc_id, reg, msg_buf); + + if (chan->use_scmi) { + /* The SCMI layer adds the shared memory header itself. */ + msg_len -= SCMI_BUF_PAYLOAD_IDX * sizeof(u32); + + mpam_scmi_ops->mpam_transfer_buf(chan->ph_handle, + MPAM_MSC_READ, + msg_buf + SCMI_BUF_PAYLOAD_IDX, + msg_len, result); + + return 0; + } + + if (msg_len < chan->pcc_shmem_size) + return -EINVAL; + + memcpy(chan->pcc_shmem, msg_buf, msg_len); + mbox_send_message(chan->pcc_mbox, NULL); + + return 0; +} + +int mpam_fb_send_write_request(struct mpam_fb_channel *chan, int msc_id, + u16 reg, u32 value) +{ + u32 msg_buf[12]; + int msg_len; + + msg_len = mpam_fb_build_write_message(msc_id, reg, value, msg_buf); + if (msg_len < 0) + return msg_len; + + if (chan->use_scmi) { + /* The SCMI layer adds the shared memory header itself. */ + msg_len -= SCMI_BUF_PAYLOAD_IDX * sizeof(u32); + + mpam_scmi_ops->mpam_transfer_buf(chan->ph_handle, + MPAM_MSC_WRITE, + msg_buf + SCMI_BUF_PAYLOAD_IDX, + msg_len, NULL); + + return 0; + } + + if (msg_len < chan->pcc_shmem_size) + return -EINVAL; + + memcpy(chan->pcc_shmem, msg_buf, msg_len); + mbox_send_message(chan->pcc_mbox, NULL); + + return 0; +} + +static int scmi_mpam_probe(struct scmi_device *sdev) +{ + const struct scmi_handle *handle = sdev->handle; + struct scmi_protocol_handle *ph; + struct scmi_mpam_agent *agent; + + if (!handle) + return -ENODEV; + + mpam_scmi_ops = handle->devm_protocol_get(sdev, SCMI_PROTOCOL_MPAM, &ph); + if (IS_ERR(mpam_scmi_ops)) + return PTR_ERR(mpam_scmi_ops); + + agent = devm_kzalloc(&sdev->dev, sizeof(*agent), GFP_KERNEL); + if (!agent) + return -ENOMEM; + + agent->of_node = sdev->dev.of_node; + agent->ph_handle = ph; + + mutex_lock(&scmi_agent_list_mutex); + list_add(&agent->list, &smci_agent_list); + mutex_unlock(&scmi_agent_list_mutex); + + return 0; +} + +static void scmi_mpam_remove(struct scmi_device *sdev) +{ +} + +static const struct scmi_device_id scmi_id_table[] = { + { SCMI_PROTOCOL_MPAM, "mpam" }, + {}, +}; +MODULE_DEVICE_TABLE(scmi, scmi_id_table); + +static struct scmi_driver scmi_mpam_driver = { + .name = "scmi-mpam-driver", + .probe = scmi_mpam_probe, + .remove = scmi_mpam_remove, + .id_table = scmi_id_table, +}; +module_scmi_driver(scmi_mpam_driver); diff --git a/drivers/resctrl/mpam_fb.h b/drivers/resctrl/mpam_fb.h new file mode 100644 index 0000000000000..723e9c5a5e1e3 --- /dev/null +++ b/drivers/resctrl/mpam_fb.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// Copyright (C) 2024 Arm Ltd. + +#ifndef MPAM_FB_H_ +#define MPAM_FB_H_ + +#include +#include +#include + +struct mpam_fb_channel { + bool use_scmi; + struct scmi_protocol_handle *ph_handle; + void __iomem *pcc_shmem; + size_t pcc_shmem_size; + struct mbox_chan *pcc_mbox; +}; + +int mpam_fb_connect_channel(const struct device_node *of_node, + struct mpam_fb_channel *chan); +int mpam_fb_send_read_request(struct mpam_fb_channel *chan, int msc_id, + u16 reg, u32 *result); +int mpam_fb_send_write_request(struct mpam_fb_channel *chan, int msc_id, + u16 reg, u32 value); + +#endif diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h new file mode 100644 index 0000000000000..c2cb5129e3e21 --- /dev/null +++ b/drivers/resctrl/mpam_internal.h @@ -0,0 +1,841 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// Copyright (C) 2025 Arm Ltd. + +#ifndef MPAM_INTERNAL_H +#define MPAM_INTERNAL_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "mpam_fb.h" + +#define MPAM_MSC_MAX_NUM_RIS 16 + + +struct platform_device; + +DECLARE_STATIC_KEY_FALSE(mpam_enabled); + +#ifdef CONFIG_MPAM_KUNIT_TEST +#define PACKED_FOR_KUNIT __packed +#else +#define PACKED_FOR_KUNIT +#endif + +/* + * This 'mon' values must not alias an actual monitor, so must be larger than + * U16_MAX, but not be confused with an errno value, so smaller than + * (u32)-SZ_4K. + * USE_PRE_ALLOCATED is used to avoid confusion with an actual monitor. + */ +#define USE_PRE_ALLOCATED (U16_MAX + 1) + +/* + * Only these event configuration bits are supported. MPAM can't know if + * data is being written back, these will show up as a write. + */ +#define MPAM_RESTRL_EVT_CONFIG_VALID (READS_TO_LOCAL_MEM | NON_TEMP_WRITE_TO_LOCAL_MEM) + +static inline bool mpam_is_enabled(void) +{ + return static_branch_likely(&mpam_enabled); +} + +/* + * Structures protected by SRCU may not be freed for a surprising amount of + * time (especially if perf is running). To ensure the MPAM error interrupt can + * tear down all the structures, build a list of objects that can be gargbage + * collected once synchronize_srcu() has returned. + * If pdev is non-NULL, use devm_kfree(). + */ +struct mpam_garbage { + /* member of mpam_garbage */ + struct llist_node llist; + + void *to_free; + struct platform_device *pdev; +}; + +struct mpam_msc { + /* member of mpam_all_msc */ + struct list_head all_msc_list; + + int id; + struct platform_device *pdev; + + /* Not modified after mpam_is_enabled() becomes true */ + enum mpam_msc_iface iface; + u32 pcc_subspace_id; + struct pcc_mbox_chan *pcc_chan; + struct mpam_fb_channel mpam_fb_chan; + int mpam_fb_msc_id; /* in its own name space */ + u32 nrdy_usec; + u64 nrdy_retry_count; + cpumask_t accessibility; + bool has_extd_esr; + + int reenable_error_ppi; + struct mpam_msc * __percpu *error_dev_id; + + atomic_t online_refs; + + /* + * probe_lock is only taken during discovery. After discovery these + * properties become read-only and the lists are protected by SRCU. + */ + struct mutex probe_lock; + bool probed; + u16 partid_max; + u8 pmg_max; + unsigned long ris_idxs; + u32 ris_max; + u32 iidr; + u16 quirks; + + /* + * error_irq_lock is taken when registering/unregistering the error + * interrupt and maniupulating the below flags. + */ + struct mutex error_irq_lock; + bool error_irq_req; + bool error_irq_hw_enabled; + + /* mpam_msc_ris of this component */ + struct list_head ris; + + /* + * part_sel_lock protects access to the MSC hardware registers that are + * affected by MPAMCFG_PART_SEL. (including the ID registers that vary + * by RIS). + * If needed, take msc->probe_lock first. + */ + struct mutex part_sel_lock; + + /* + * mon_sel_lock protects access to the MSC hardware registers that are + * affected by MPAMCFG_MON_SEL, and the mbwu_state. + * Both the 'inner' and 'outer' must be taken. + * For real MMIO MSC, the outer lock is unnecessary - but keeps the + * code common with: + * Firmware backed MSC need to sleep when accessing the MSC, which + * means some code-paths will always fail. For these MSC the outer + * lock is providing the protection, and the inner lock fails to + * be taken if the task is unable to sleep. + * + * If needed, take msc->probe_lock first. + */ + struct mutex outer_mon_sel_lock; + bool outer_lock_held; + raw_spinlock_t inner_mon_sel_lock; + unsigned long inner_mon_sel_flags; + + void __iomem *mapped_hwpage; + size_t mapped_hwpage_sz; + + struct dentry *debugfs; + + /* Values only used on some platforms for quirks */ + u32 t241_id; + + struct mpam_garbage garbage; +}; + +static inline bool __must_check mpam_mon_sel_inner_lock(struct mpam_msc *msc) +{ + /* + * The outer lock may be taken by a CPU that then issues an IPI to run + * a helper that takes the inner lock. lockdep can't help us here. + */ + WARN_ON_ONCE(!READ_ONCE(msc->outer_lock_held)); + + if (msc->iface == MPAM_IFACE_MMIO) { + raw_spin_lock_irqsave(&msc->inner_mon_sel_lock, msc->inner_mon_sel_flags); + return true; + } + + /* Accesses must fail if we are not pre-emptible */ + return !!preemptible(); +} + +static inline void mpam_mon_sel_inner_unlock(struct mpam_msc *msc) +{ + WARN_ON_ONCE(!READ_ONCE(msc->outer_lock_held)); + + if (msc->iface == MPAM_IFACE_MMIO) + raw_spin_unlock_irqrestore(&msc->inner_mon_sel_lock, msc->inner_mon_sel_flags); +} + +static inline void mpam_mon_sel_outer_lock(struct mpam_msc *msc) +{ + mutex_lock(&msc->outer_mon_sel_lock); + msc->outer_lock_held = true; +} + +static inline void mpam_mon_sel_outer_unlock(struct mpam_msc *msc) +{ + msc->outer_lock_held = false; + mutex_unlock(&msc->outer_mon_sel_lock); +} + +static inline void mpam_mon_sel_lock_held(struct mpam_msc *msc) +{ + WARN_ON_ONCE(!READ_ONCE(msc->outer_lock_held)); + if (msc->iface == MPAM_IFACE_MMIO) + lockdep_assert_held_once(&msc->inner_mon_sel_lock); + else + lockdep_assert_preemption_enabled(); +} + +static inline void mpam_mon_sel_lock_init(struct mpam_msc *msc) +{ + raw_spin_lock_init(&msc->inner_mon_sel_lock); + mutex_init(&msc->outer_mon_sel_lock); +} + +/* Bits for mpam features bitmaps */ +enum mpam_device_features { + mpam_feat_cmax_softlim, + mpam_feat_cmax_cmax, + mpam_feat_cmax_cmin, + mpam_feat_cmax_cassoc, + mpam_feat_cpor_part, + mpam_feat_mbw_part, + mpam_feat_mbw_min, + mpam_feat_mbw_max, + mpam_feat_mbw_prop, + mpam_feat_intpri_part, + mpam_feat_intpri_part_0_low, + mpam_feat_dspri_part, + mpam_feat_dspri_part_0_low, + mpam_feat_msmon, + mpam_feat_msmon_csu, + mpam_feat_msmon_csu_capture, + mpam_feat_msmon_csu_xcl, + mpam_feat_msmon_csu_hw_nrdy, + mpam_feat_msmon_mbwu, + mpam_feat_msmon_mbwu_31counter, + mpam_feat_msmon_mbwu_44counter, + mpam_feat_msmon_mbwu_63counter, + mpam_feat_msmon_mbwu_capture, + mpam_feat_msmon_mbwu_rwbw, + mpam_feat_msmon_mbwu_hw_nrdy, + mpam_feat_partid_nrw, + MPAM_FEATURE_LAST, +}; + +struct mpam_props { + DECLARE_BITMAP(features, MPAM_FEATURE_LAST); + + u16 cpbm_wd; + u16 mbw_pbm_bits; + u16 bwa_wd; + u16 cmax_wd; + u16 cassoc_wd; + u16 intpri_wd; + u16 dspri_wd; + u16 num_csu_mon; + u16 num_mbwu_mon; + +/* + * Kunit tests use memset() to set up feature combinations that should be + * removed, and will false-positive if the compiler introduces padding that + * isn't cleared during sanitisation. + */ +} PACKED_FOR_KUNIT __aligned(__alignof__(unsigned long)); + +#define mpam_has_feature(_feat, x) test_bit(_feat, (x)->features) +#define mpam_set_feature(_feat, x) set_bit(_feat, (x)->features) +#define mpam_clear_feature(_feat, x) clear_bit(_feat, (x)->features) + +/* Workaround bits for msc->quirks */ +enum mpam_device_quirks { + T241_SCRUB_SHADOW_REGS, + T241_FORCE_MBW_MIN_TO_ONE, + T241_MBW_COUNTER_SCALE_64, + IGNORE_CSU_NRDY, + MPAM_QUIRK_LAST, +}; + +#define mpam_has_quirk(_quirk, x) ((1 << (_quirk) & (x)->quirks)) +#define mpam_set_quirk(_quirk, x) ((x)->quirks |= (1 << (_quirk))) + +struct mpam_quirk { + void (*init)(struct mpam_msc *msc, const struct mpam_quirk *quirk); + + u32 iidr; + u32 iidr_mask; + + enum mpam_device_quirks workaround; +}; + +#define IIDR_PROD(x) ((x) << MPAMF_IIDR_PRODUCTID_SHIFT) +#define IIDR_VAR(x) ((x) << MPAMF_IIDR_VARIANT_SHIFT) +#define IIDR_REV(x) ((x) << MPAMF_IIDR_REVISION_SHIFT) +#define IIDR_IMP(x) ((x) << MPAMF_IIDR_IMPLEMENTER_SHIFT) + +#define IIDR_MATCH_ONE (IIDR_PROD(0xfff) | IIDR_VAR(0xf) | IIDR_REV(0xf) | IIDR_IMP(0xfff)) + +/* The values for MSMON_CFG_MBWU_FLT.RWBW */ +enum mon_filter_options { + COUNT_BOTH = 0, + COUNT_WRITE = 1, + COUNT_READ = 2, +}; + +struct mon_cfg { + /* + * mon must be large enough to hold out of range values like + * USE_RMID_IDX + */ + u32 mon; + u8 pmg; + bool match_pmg; + bool csu_exclude_clean; + u32 partid; + enum mon_filter_options opts; +}; + +/* + * Changes to enabled and cfg are protected by the msc->lock. + * The msc's mon_sel_lock protects: + * - reset_on_next_read + * - prev_val + * - correction + */ +struct msmon_mbwu_state { + bool enabled; + bool reset_on_next_read; + struct mon_cfg cfg; + + /* The value last read from the hardware. Used to detect overflow. */ + u64 prev_val; + + /* + * The value to add to the new reading to account for power management, + * and shifts to trigger the overflow interrupt. + */ + u64 correction; + + struct mpam_garbage garbage; +}; + +struct mpam_class { + /* mpam_components in this class */ + struct list_head components; + + cpumask_t affinity; + + struct mpam_props props; + u32 nrdy_usec; + u16 quirks; + u8 level; + enum mpam_class_types type; + + /* member of mpam_classes */ + struct list_head classes_list; + + struct ida ida_csu_mon; + struct ida ida_mbwu_mon; + + struct dentry *debugfs; + struct mpam_garbage garbage; +}; + +struct mpam_config { + /* Which configuration values are valid. */ + DECLARE_BITMAP(features, MPAM_FEATURE_LAST); + + u32 cpbm; + u32 mbw_pbm; + u16 mbw_max; + u16 mbw_min; + u16 cmax; + u16 cmin; + + bool cmax_softlim; + + bool reset_cpbm; + bool reset_mbw_pbm; + + struct mpam_garbage garbage; +}; + +struct mpam_component { + u32 comp_id; + + /* mpam_vmsc in this component */ + struct list_head vmsc; + + cpumask_t affinity; + + /* + * Array of configuration values, indexed by partid. + * Read from cpuhp callbacks, hold the cpuhp lock when writing. + */ + struct mpam_config *cfg; + + /* member of mpam_class:components */ + struct list_head class_list; + + /* parent: */ + struct mpam_class *class; + + struct dentry *debugfs; + struct mpam_garbage garbage; +}; + +struct mpam_vmsc { + /* member of mpam_component:vmsc_list */ + struct list_head comp_list; + + /* mpam_msc_ris in this vmsc */ + struct list_head ris; + + struct mpam_props props; + + /* All RIS in this vMSC are members of this MSC */ + struct mpam_msc *msc; + + /* parent: */ + struct mpam_component *comp; + + struct dentry *debugfs; + struct mpam_garbage garbage; +}; + +struct mpam_msc_ris { + u8 ris_idx; + u64 idr; + u32 cpor_idr; + u32 ccap_idr; + struct mpam_props props; + bool in_reset_state; + + cpumask_t affinity; + + /* member of mpam_vmsc:ris */ + struct list_head vmsc_list; + + /* member of mpam_msc:ris */ + struct list_head msc_list; + + /* parent: */ + struct mpam_vmsc *vmsc; + + /* msmon mbwu configuration is preserved over reset */ + struct msmon_mbwu_state *mbwu_state; + + struct dentry *debugfs; + struct mpam_garbage garbage; +}; + +struct mpam_resctrl_dom { + struct mpam_component *ctrl_comp; + + /* + * There is no single mon_comp because different events may be backed + * by different class/components. mon_comp is indexed by the event + * number. + */ + struct mpam_component *mon_comp[QOS_NUM_EVENTS]; + + struct rdt_ctrl_domain resctrl_ctrl_dom; + struct rdt_mon_domain resctrl_mon_dom; + + u32 mbm_local_evt_cfg; +}; + +struct mpam_resctrl_res { + struct mpam_class *class; + struct rdt_resource resctrl_res; +}; + +struct mpam_resctrl_mon { + struct mpam_class *class; + + /* + * Array of allocated MBWU monitors, indexed by (closid, rmid). + * When ABMC is not in use, this array directly maps (closid, rmid) + * to the allocated monitor. Otherwise this array is sparse, and + * un-assigned (closid, rmid) are -1. + */ + int *mbwu_idx_to_mon; + + /* + * Array of assigned MBWU monitors, indexed by idx argument. + * When ABMC is not in use, this array can be NULL. Otherwise + * it maps idx to the allocated monitor. + */ + int *assigned_counters; +}; + +static inline int mpam_alloc_csu_mon(struct mpam_class *class) +{ + struct mpam_props *cprops = &class->props; + + if (!mpam_has_feature(mpam_feat_msmon_csu, cprops)) + return -EOPNOTSUPP; + + return ida_alloc_max(&class->ida_csu_mon, cprops->num_csu_mon - 1, + GFP_KERNEL); +} + +static inline void mpam_free_csu_mon(struct mpam_class *class, int csu_mon) +{ + ida_free(&class->ida_csu_mon, csu_mon); +} + +static inline int mpam_alloc_mbwu_mon(struct mpam_class *class) +{ + struct mpam_props *cprops = &class->props; + + if (!mpam_has_feature(mpam_feat_msmon_mbwu, cprops)) + return -EOPNOTSUPP; + + return ida_alloc_max(&class->ida_mbwu_mon, cprops->num_mbwu_mon - 1, + GFP_KERNEL); +} + +static inline void mpam_free_mbwu_mon(struct mpam_class *class, int mbwu_mon) +{ + ida_free(&class->ida_mbwu_mon, mbwu_mon); +} + +/* List of all classes - protected by srcu*/ +extern struct srcu_struct mpam_srcu; +extern struct list_head mpam_classes; + +/* System wide partid/pmg values */ +extern u16 mpam_partid_max; +extern u8 mpam_pmg_max; + +/* Scheduled work callback to enable mpam once all MSC have been probed */ +void mpam_enable(struct work_struct *work); +void mpam_disable(struct work_struct *work); + +/* Reset all the RIS in a class, optionally while holding cpus_read_lock() */ +void mpam_reset_class_locked(struct mpam_class *class); +void mpam_reset_class(struct mpam_class *class); + +/* Reset all the RIS in a component under cpus_read_lock() */ +void mpam_reset_component_locked(struct mpam_component *comp); + +int mpam_apply_config(struct mpam_component *comp, u16 partid, + struct mpam_config *cfg); + +int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx, + enum mpam_device_features, u64 *val); +void mpam_msmon_reset_mbwu(struct mpam_component *comp, struct mon_cfg *ctx); +void mpam_msmon_reset_all_mbwu(struct mpam_component *comp); + +int mpam_get_cpumask_from_cache_id(unsigned long cache_id, u32 cache_level, + cpumask_t *affinity); + +bool mpam_force_unknown_msc_test(struct mpam_msc *msc); + +void mpam_pcc_rx_callback(struct mbox_client *cl, void *msg); + +#ifdef CONFIG_RESCTRL_FS +int mpam_resctrl_setup(void); +void mpam_resctrl_exit(void); +int mpam_resctrl_online_cpu(unsigned int cpu); +int mpam_resctrl_offline_cpu(unsigned int cpu); +void mpam_resctrl_teardown_class(struct mpam_class *class); +#else +static inline int mpam_resctrl_setup(void) { return 0; } +static inline void mpam_resctrl_exit(void) { } +static inline int mpam_resctrl_online_cpu(unsigned int cpu) { return 0; } +static inline int mpam_resctrl_offline_cpu(unsigned int cpu) { return 0; } +static inline void mpam_resctrl_teardown_class(struct mpam_class *class) { } +#endif /* CONFIG_RESCTRL_FS */ + +/* + * MPAM MSCs have the following register layout. See: + * Arm Memory System Resource Partitioning and Monitoring (MPAM) System + * Component Specification. + * https://developer.arm.com/documentation/ihi0099/latest/ + */ +#define MPAM_ARCHITECTURE_V1 0x10 + +/* Memory mapped control pages */ +/* ID Register offsets in the memory mapped page */ +#define MPAMF_IDR 0x0000 /* features id register */ +#define MPAMF_IIDR 0x0018 /* implementer id register */ +#define MPAMF_AIDR 0x0020 /* architectural id register */ +#define MPAMF_IMPL_IDR 0x0028 /* imp-def partitioning */ +#define MPAMF_CPOR_IDR 0x0030 /* cache-portion partitioning */ +#define MPAMF_CCAP_IDR 0x0038 /* cache-capacity partitioning */ +#define MPAMF_MBW_IDR 0x0040 /* mem-bw partitioning */ +#define MPAMF_PRI_IDR 0x0048 /* priority partitioning */ +#define MPAMF_MSMON_IDR 0x0080 /* performance monitoring features */ +#define MPAMF_CSUMON_IDR 0x0088 /* cache-usage monitor */ +#define MPAMF_MBWUMON_IDR 0x0090 /* mem-bw usage monitor */ +#define MPAMF_PARTID_NRW_IDR 0x0050 /* partid-narrowing */ + +/* Configuration and Status Register offsets in the memory mapped page */ +#define MPAMCFG_PART_SEL 0x0100 /* partid to configure */ +#define MPAMCFG_CPBM 0x1000 /* cache-portion config */ +#define MPAMCFG_CMAX 0x0108 /* cache-capacity config */ +#define MPAMCFG_CMIN 0x0110 /* cache-capacity config */ +#define MPAMCFG_CASSOC 0x0118 /* cache-associativity config */ +#define MPAMCFG_MBW_MIN 0x0200 /* min mem-bw config */ +#define MPAMCFG_MBW_MAX 0x0208 /* max mem-bw config */ +#define MPAMCFG_MBW_WINWD 0x0220 /* mem-bw accounting window config */ +#define MPAMCFG_MBW_PBM 0x2000 /* mem-bw portion bitmap config */ +#define MPAMCFG_PRI 0x0400 /* priority partitioning config */ +#define MPAMCFG_MBW_PROP 0x0500 /* mem-bw stride config */ +#define MPAMCFG_INTPARTID 0x0600 /* partid-narrowing config */ + +#define MSMON_CFG_MON_SEL 0x0800 /* monitor selector */ +#define MSMON_CFG_CSU_FLT 0x0810 /* cache-usage monitor filter */ +#define MSMON_CFG_CSU_CTL 0x0818 /* cache-usage monitor config */ +#define MSMON_CFG_MBWU_FLT 0x0820 /* mem-bw monitor filter */ +#define MSMON_CFG_MBWU_CTL 0x0828 /* mem-bw monitor config */ +#define MSMON_CSU 0x0840 /* current cache-usage */ +#define MSMON_CSU_CAPTURE 0x0848 /* last cache-usage value captured */ +#define MSMON_MBWU 0x0860 /* current mem-bw usage value */ +#define MSMON_MBWU_CAPTURE 0x0868 /* last mem-bw value captured */ +#define MSMON_MBWU_L 0x0880 /* current long mem-bw usage value */ +#define MSMON_MBWU_CAPTURE_L 0x0890 /* last long mem-bw value captured */ +#define MSMON_CAPT_EVNT 0x0808 /* signal a capture event */ +#define MPAMF_ESR 0x00F8 /* error status register */ +#define MPAMF_ECR 0x00F0 /* error control register */ + +/* MPAMF_IDR - MPAM features ID register */ +#define MPAMF_IDR_PARTID_MAX GENMASK(15, 0) +#define MPAMF_IDR_PMG_MAX GENMASK(23, 16) +#define MPAMF_IDR_HAS_CCAP_PART BIT(24) +#define MPAMF_IDR_HAS_CPOR_PART BIT(25) +#define MPAMF_IDR_HAS_MBW_PART BIT(26) +#define MPAMF_IDR_HAS_PRI_PART BIT(27) +#define MPAMF_IDR_EXT BIT(28) +#define MPAMF_IDR_HAS_IMPL_IDR BIT(29) +#define MPAMF_IDR_HAS_MSMON BIT(30) +#define MPAMF_IDR_HAS_PARTID_NRW BIT(31) +#define MPAMF_IDR_HAS_RIS BIT(32) +#define MPAMF_IDR_HAS_EXTD_ESR BIT(38) +#define MPAMF_IDR_HAS_ESR BIT(39) +#define MPAMF_IDR_RIS_MAX GENMASK(59, 56) + +/* MPAMF_MSMON_IDR - MPAM performance monitoring ID register */ +#define MPAMF_MSMON_IDR_MSMON_CSU BIT(16) +#define MPAMF_MSMON_IDR_MSMON_MBWU BIT(17) +#define MPAMF_MSMON_IDR_HAS_LOCAL_CAPT_EVNT BIT(31) + +/* MPAMF_CPOR_IDR - MPAM features cache portion partitioning ID register */ +#define MPAMF_CPOR_IDR_CPBM_WD GENMASK(15, 0) + +/* MPAMF_CCAP_IDR - MPAM features cache capacity partitioning ID register */ +#define MPAMF_CCAP_IDR_CMAX_WD GENMASK(5, 0) +#define MPAMF_CCAP_IDR_CASSOC_WD GENMASK(12, 8) +#define MPAMF_CCAP_IDR_HAS_CASSOC BIT(28) +#define MPAMF_CCAP_IDR_HAS_CMIN BIT(29) +#define MPAMF_CCAP_IDR_NO_CMAX BIT(30) +#define MPAMF_CCAP_IDR_HAS_CMAX_SOFTLIM BIT(31) + +/* MPAMF_MBW_IDR - MPAM features memory bandwidth partitioning ID register */ +#define MPAMF_MBW_IDR_BWA_WD GENMASK(5, 0) +#define MPAMF_MBW_IDR_HAS_MIN BIT(10) +#define MPAMF_MBW_IDR_HAS_MAX BIT(11) +#define MPAMF_MBW_IDR_HAS_PBM BIT(12) +#define MPAMF_MBW_IDR_HAS_PROP BIT(13) +#define MPAMF_MBW_IDR_WINDWR BIT(14) +#define MPAMF_MBW_IDR_BWPBM_WD GENMASK(28, 16) + +/* MPAMF_PRI_IDR - MPAM features priority partitioning ID register */ +#define MPAMF_PRI_IDR_HAS_INTPRI BIT(0) +#define MPAMF_PRI_IDR_INTPRI_0_IS_LOW BIT(1) +#define MPAMF_PRI_IDR_INTPRI_WD GENMASK(9, 4) +#define MPAMF_PRI_IDR_HAS_DSPRI BIT(16) +#define MPAMF_PRI_IDR_DSPRI_0_IS_LOW BIT(17) +#define MPAMF_PRI_IDR_DSPRI_WD GENMASK(25, 20) + +/* MPAMF_CSUMON_IDR - MPAM cache storage usage monitor ID register */ +#define MPAMF_CSUMON_IDR_NUM_MON GENMASK(15, 0) +#define MPAMF_CSUMON_IDR_HAS_OFLOW_CAPT BIT(24) +#define MPAMF_CSUMON_IDR_HAS_CEVNT_OFLW BIT(25) +#define MPAMF_CSUMON_IDR_HAS_OFSR BIT(26) +#define MPAMF_CSUMON_IDR_HAS_OFLOW_LNKG BIT(27) +#define MPAMF_CSUMON_IDR_HAS_XCL BIT(29) +#define MPAMF_CSUMON_IDR_CSU_RO BIT(30) +#define MPAMF_CSUMON_IDR_HAS_CAPTURE BIT(31) + +/* MPAMF_MBWUMON_IDR - MPAM memory bandwidth usage monitor ID register */ +#define MPAMF_MBWUMON_IDR_NUM_MON GENMASK(15, 0) +#define MPAMF_MBWUMON_IDR_HAS_RWBW BIT(28) +#define MPAMF_MBWUMON_IDR_LWD BIT(29) +#define MPAMF_MBWUMON_IDR_HAS_LONG BIT(30) +#define MPAMF_MBWUMON_IDR_HAS_CAPTURE BIT(31) + +/* MPAMF_PARTID_NRW_IDR - MPAM PARTID narrowing ID register */ +#define MPAMF_PARTID_NRW_IDR_INTPARTID_MAX GENMASK(15, 0) + +/* MPAMF_IIDR - MPAM implementation ID register */ +#define MPAMF_IIDR_IMPLEMENTER GENMASK(11, 0) +#define MPAMF_IIDR_REVISION GENMASK(15, 12) +#define MPAMF_IIDR_VARIANT GENMASK(19, 16) +#define MPAMF_IIDR_PRODUCTID GENMASK(31, 20) + +#define MPAMF_IIDR_IMPLEMENTER_SHIFT 0 +#define MPAMF_IIDR_REVISION_SHIFT 12 +#define MPAMF_IIDR_VARIANT_SHIFT 16 +#define MPAMF_IIDR_PRODUCTID_SHIFT 20 + +/* MPAMF_AIDR - MPAM architecture ID register */ +#define MPAMF_AIDR_ARCH_MINOR_REV GENMASK(3, 0) +#define MPAMF_AIDR_ARCH_MAJOR_REV GENMASK(7, 4) + +/* MPAMCFG_PART_SEL - MPAM partition configuration selection register */ +#define MPAMCFG_PART_SEL_PARTID_SEL GENMASK(15, 0) +#define MPAMCFG_PART_SEL_INTERNAL BIT(16) +#define MPAMCFG_PART_SEL_RIS GENMASK(27, 24) + +/* MPAMCFG_CASSOC - MPAM cache maximum associativity partition configuration register */ +#define MPAMCFG_CASSOC_CASSOC GENMASK(15, 0) + +/* MPAMCFG_CMAX - MPAM cache capacity configuration register */ +#define MPAMCFG_CMAX_SOFTLIM BIT(31) +#define MPAMCFG_CMAX_CMAX GENMASK(15, 0) + +/* MPAMCFG_CMIN - MPAM cache capacity configuration register */ +#define MPAMCFG_CMIN_CMIN GENMASK(15, 0) + +/* + * MPAMCFG_MBW_MIN - MPAM memory minimum bandwidth partitioning configuration + * register + */ +#define MPAMCFG_MBW_MIN_MIN GENMASK(15, 0) + +/* + * MPAMCFG_MBW_MAX - MPAM memory maximum bandwidth partitioning configuration + * register + */ +#define MPAMCFG_MBW_MAX_MAX_NR_BITS 16 +#define MPAMCFG_MBW_MAX_MAX GENMASK(15, 0) +#define MPAMCFG_MBW_MAX_HARDLIM BIT(31) + +/* + * MPAMCFG_MBW_WINWD - MPAM memory bandwidth partitioning window width + * register + */ +#define MPAMCFG_MBW_WINWD_US_FRAC GENMASK(7, 0) +#define MPAMCFG_MBW_WINWD_US_INT GENMASK(23, 8) + +/* MPAMCFG_PRI - MPAM priority partitioning configuration register */ +#define MPAMCFG_PRI_INTPRI GENMASK(15, 0) +#define MPAMCFG_PRI_DSPRI GENMASK(31, 16) + +/* + * MPAMCFG_MBW_PROP - Memory bandwidth proportional stride partitioning + * configuration register + */ +#define MPAMCFG_MBW_PROP_STRIDEM1 GENMASK(15, 0) +#define MPAMCFG_MBW_PROP_EN BIT(31) + +/* + * MPAMCFG_INTPARTID - MPAM internal partition narrowing configuration register + */ +#define MPAMCFG_INTPARTID_INTPARTID GENMASK(15, 0) +#define MPAMCFG_INTPARTID_INTERNAL BIT(16) + +/* MSMON_CFG_MON_SEL - Memory system performance monitor selection register */ +#define MSMON_CFG_MON_SEL_MON_SEL GENMASK(15, 0) +#define MSMON_CFG_MON_SEL_RIS GENMASK(27, 24) + +/* MPAMF_ESR - MPAM Error Status Register */ +#define MPAMF_ESR_PARTID_MON GENMASK(15, 0) +#define MPAMF_ESR_PMG GENMASK(23, 16) +#define MPAMF_ESR_ERRCODE GENMASK(27, 24) +#define MPAMF_ESR_OVRWR BIT(31) +#define MPAMF_ESR_RIS GENMASK(35, 32) + +/* MPAMF_ECR - MPAM Error Control Register */ +#define MPAMF_ECR_INTEN BIT(0) + +/* Error conditions in accessing memory mapped registers */ +#define MPAM_ERRCODE_NONE 0 +#define MPAM_ERRCODE_PARTID_SEL_RANGE 1 +#define MPAM_ERRCODE_REQ_PARTID_RANGE 2 +#define MPAM_ERRCODE_MSMONCFG_ID_RANGE 3 +#define MPAM_ERRCODE_REQ_PMG_RANGE 4 +#define MPAM_ERRCODE_MONITOR_RANGE 5 +#define MPAM_ERRCODE_INTPARTID_RANGE 6 +#define MPAM_ERRCODE_UNEXPECTED_INTERNAL 7 +#define MPAM_ERRCODE_UNDEFINED_RIS_PART_SEL 8 +#define MPAM_ERRCODE_RIS_NO_CONTROL 9 +#define MPAM_ERRCODE_UNDEFINED_RIS_MON_SEL 10 +#define MPAM_ERRCODE_RIS_NO_MONITOR 11 + +/* + * MSMON_CFG_CSU_CTL - Memory system performance monitor configure cache storage + * usage monitor control register + * MSMON_CFG_MBWU_CTL - Memory system performance monitor configure memory + * bandwidth usage monitor control register + */ +#define MSMON_CFG_x_CTL_TYPE GENMASK(7, 0) +#define MSMON_CFG_MBWU_CTL_OFLOW_STATUS_L BIT(15) +#define MSMON_CFG_x_CTL_MATCH_PARTID BIT(16) +#define MSMON_CFG_x_CTL_MATCH_PMG BIT(17) +#define MSMON_CFG_x_CTL_SUBTYPE GENMASK(22, 20) +#define MSMON_CFG_x_CTL_OFLOW_FRZ BIT(24) +#define MSMON_CFG_x_CTL_OFLOW_INTR BIT(25) +#define MSMON_CFG_x_CTL_OFLOW_STATUS BIT(26) +#define MSMON_CFG_x_CTL_CAPT_RESET BIT(27) +#define MSMON_CFG_x_CTL_CAPT_EVNT GENMASK(30, 28) +#define MSMON_CFG_x_CTL_EN BIT(31) + +#define MSMON_CFG_MBWU_CTL_TYPE_MBWU 0x42 +#define MSMON_CFG_CSU_CTL_TYPE_CSU 0x43 + +#define MSMON_CFG_MBWU_CTL_SCLEN BIT(19) + +/* + * MSMON_CFG_CSU_FLT - Memory system performance monitor configure cache storage + * usage monitor filter register + * MSMON_CFG_MBWU_FLT - Memory system performance monitor configure memory + * bandwidth usage monitor filter register + */ +#define MSMON_CFG_x_FLT_PARTID GENMASK(15, 0) +#define MSMON_CFG_x_FLT_PMG GENMASK(23, 16) + +#define MSMON_CFG_MBWU_FLT_RWBW GENMASK(31, 30) +#define MSMON_CFG_CSU_FLT_XCL BIT(31) + +/* + * MSMON_CSU - Memory system performance monitor cache storage usage monitor + * register + * MSMON_CSU_CAPTURE - Memory system performance monitor cache storage usage + * capture register + * MSMON_MBWU - Memory system performance monitor memory bandwidth usage + * monitor register + * MSMON_MBWU_CAPTURE - Memory system performance monitor memory bandwidth usage + * capture register + */ +#define MSMON___VALUE GENMASK(30, 0) +#define MSMON___NRDY BIT(31) +#define MSMON___NRDY_L BIT(63) +#define MSMON___L_VALUE GENMASK(43, 0) +#define MSMON___LWD_VALUE GENMASK(62, 0) + +/* + * MSMON_CAPT_EVNT - Memory system performance monitoring capture event + * generation register + */ +#define MSMON_CAPT_EVNT_NOW BIT(0) + +#endif /* MPAM_INTERNAL_H */ diff --git a/drivers/resctrl/mpam_resctrl.c b/drivers/resctrl/mpam_resctrl.c new file mode 100644 index 0000000000000..8e87afa90656a --- /dev/null +++ b/drivers/resctrl/mpam_resctrl.c @@ -0,0 +1,2503 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2025 Arm Ltd. + +#define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "mpam_internal.h" + +DECLARE_WAIT_QUEUE_HEAD(resctrl_mon_ctx_waiters); + +/* + * The classes we've picked to map to resctrl resources, wrapped + * in with their resctrl structure. + * Class pointer may be NULL. + */ +static struct mpam_resctrl_res mpam_resctrl_controls[RDT_NUM_RESOURCES]; + +/* The lock for modifying resctrl's domain lists from cpuhp callbacks. */ +static DEFINE_MUTEX(domain_list_lock); + +/* + * The classes we've picked to map to resctrl events. + * Resctrl believes all the worlds a Xeon, and these are all on the L3. This + * array lets us find the actual class backing the event counters. e.g. + * the only memory bandwidth counters may be on the memory controller, but to + * make use of them, we pretend they are on L3. + * Class pointer may be NULL. + */ +static struct mpam_resctrl_mon mpam_resctrl_counters[QOS_NUM_EVENTS]; + +static bool exposed_alloc_capable; +static bool exposed_mon_capable; + +/* + * MPAM emulates CDP by setting different PARTID in the I/D fields of MPAM0_EL1. + * This applies globally to all traffic the CPU generates. + */ +static bool cdp_enabled; + +/* + * To support CPU-less NUMA nodes, user-space needs to opt in to the MB + * domain IDs being the NUMA nid instead of the corresponding CPU's L3 + * cache-id. + */ +static bool mb_uses_numa_nid; +static bool mb_numa_nid_possible; +static bool mb_l3_cache_id_possible; +/* + * If resctrl_init() succeeded, resctrl_exit() can be used to remove support + * for the filesystem in the event of an error. + */ +static bool resctrl_enabled; + +/* + * mpam_resctrl_pick_caches() needs to know the size of the caches. cacheinfo + * populates this from a device_initcall(). mpam_resctrl_setup() must wait. + */ +static bool cacheinfo_ready; +static DECLARE_WAIT_QUEUE_HEAD(wait_cacheinfo_ready); + +/* + * L3 local/total may come from different classes - what is the number of MBWU + * 'on L3'? + */ +static unsigned int l3_num_allocated_mbwu = ~0; + +/* Whether this num_mbw_mon could result in a free_running system */ +static int __mpam_monitors_free_running(u16 num_mbwu_mon) +{ + if (num_mbwu_mon >= resctrl_arch_system_num_rmid_idx()) + return resctrl_arch_system_num_rmid_idx(); + return 0; +} + +/* + * If l3_num_allocated_mbwu is forced below PARTID * PMG, then the counters + * are not free running, and ABMC's user-interface must be used to assign them. + */ +static bool mpam_resctrl_abmc_enabled(void) +{ + return l3_num_allocated_mbwu < resctrl_arch_system_num_rmid_idx(); +} + +bool resctrl_arch_alloc_capable(void) +{ + return exposed_alloc_capable; +} + +bool resctrl_arch_mon_capable(void) +{ + return exposed_mon_capable; +} + +bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level rid) +{ + switch (rid) { + case RDT_RESOURCE_L2: + case RDT_RESOURCE_L3: + return cdp_enabled; + case RDT_RESOURCE_MBA: + default: + /* + * x86's MBA control doesn't support CDP, so user-space doesn't + * expect it. + */ + return false; + } +} + +/** + * resctrl_reset_task_closids() - Reset the PARTID/PMG values for all tasks. + * + * At boot, all existing tasks use partid zero for D and I. + * To enable/disable CDP emulation, all these tasks need relabelling. + */ +static void resctrl_reset_task_closids(void) +{ + struct task_struct *p, *t; + + read_lock(&tasklist_lock); + for_each_process_thread(p, t) { + resctrl_arch_set_closid_rmid(t, RESCTRL_RESERVED_CLOSID, + RESCTRL_RESERVED_RMID); + } + read_unlock(&tasklist_lock); +} + +static void mpam_resctrl_monitor_sync_abmc_vals(struct rdt_resource *l3) +{ + l3->mon.num_mbm_cntrs = l3_num_allocated_mbwu; + if (cdp_enabled) + l3->mon.num_mbm_cntrs /= 2; + + if (l3->mon.num_mbm_cntrs) { + l3->mon.mbm_cntr_assignable = mpam_resctrl_abmc_enabled(); + l3->mon.mbm_assign_on_mkdir = mpam_resctrl_abmc_enabled(); + } else { + l3->mon.mbm_cntr_assignable = false; + l3->mon.mbm_assign_on_mkdir = false; + } +} + +int resctrl_arch_set_cdp_enabled(enum resctrl_res_level ignored, bool enable) +{ + struct mpam_resctrl_res *res = &mpam_resctrl_controls[RDT_RESOURCE_L3]; + struct rdt_resource *l3 = &res->resctrl_res; + u32 partid, partid_i, partid_d; + u64 regval; + + cdp_enabled = enable; + + partid = RESCTRL_RESERVED_CLOSID; + + if (enable) { + partid_d = resctrl_get_config_index(partid, CDP_CODE); + partid_i = resctrl_get_config_index(partid, CDP_DATA); + regval = FIELD_PREP(MPAM0_EL1_PARTID_D, partid_d) | + FIELD_PREP(MPAM0_EL1_PARTID_I, partid_i); + } else { + regval = FIELD_PREP(MPAM0_EL1_PARTID_D, partid) | + FIELD_PREP(MPAM0_EL1_PARTID_I, partid); + } + + resctrl_reset_task_closids(); + mpam_resctrl_monitor_sync_abmc_vals(l3); + + WRITE_ONCE(arm64_mpam_global_default, regval); + + return 0; +} + +static bool mpam_resctrl_hide_cdp(enum resctrl_res_level rid) +{ + return cdp_enabled && !resctrl_arch_get_cdp_enabled(rid); +} + +/* + * MSC may raise an error interrupt if it sees an out or range partid/pmg, + * and go on to truncate the value. Regardless of what the hardware supports, + * only the system wide safe value is safe to use. + */ +u32 resctrl_arch_get_num_closid(struct rdt_resource *ignored) +{ + return mpam_partid_max + 1; +} + +u32 resctrl_arch_system_num_rmid_idx(void) +{ + u8 closid_shift = fls(mpam_pmg_max); + u32 num_partid = resctrl_arch_get_num_closid(NULL); + + return num_partid << closid_shift; +} + +u32 resctrl_arch_rmid_idx_encode(u32 closid, u32 rmid) +{ + u8 closid_shift = fls(mpam_pmg_max); + + WARN_ON_ONCE(closid_shift > 8); + + return (closid << closid_shift) | rmid; +} + +void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid) +{ + u8 closid_shift = fls(mpam_pmg_max); + u32 pmg_mask = ~(~0 << closid_shift); + + WARN_ON_ONCE(closid_shift > 8); + + *closid = idx >> closid_shift; + *rmid = idx & pmg_mask; +} + +void resctrl_arch_sched_in(struct task_struct *tsk) +{ + lockdep_assert_preemption_disabled(); + + mpam_thread_switch(tsk); +} + +void resctrl_arch_set_cpu_default_closid_rmid(int cpu, u32 closid, u32 rmid) +{ + WARN_ON_ONCE(closid > U16_MAX); + WARN_ON_ONCE(rmid > U8_MAX); + + if (!cdp_enabled) { + mpam_set_cpu_defaults(cpu, closid, closid, rmid, rmid); + } else { + /* + * When CDP is enabled, resctrl halves the closid range and we + * use odd/even partid for one closid. + */ + u32 partid_d = resctrl_get_config_index(closid, CDP_DATA); + u32 partid_i = resctrl_get_config_index(closid, CDP_CODE); + + mpam_set_cpu_defaults(cpu, partid_d, partid_i, rmid, rmid); + } +} + +void resctrl_arch_sync_cpu_closid_rmid(void *info) +{ + struct resctrl_cpu_defaults *r = info; + + lockdep_assert_preemption_disabled(); + + if (r) { + resctrl_arch_set_cpu_default_closid_rmid(smp_processor_id(), + r->closid, r->rmid); + } + + resctrl_arch_sched_in(current); +} + +void resctrl_arch_set_closid_rmid(struct task_struct *tsk, u32 closid, u32 rmid) +{ + WARN_ON_ONCE(closid > U16_MAX); + WARN_ON_ONCE(rmid > U8_MAX); + + if (!cdp_enabled) { + mpam_set_task_partid_pmg(tsk, closid, closid, rmid, rmid); + } else { + u32 partid_d = resctrl_get_config_index(closid, CDP_DATA); + u32 partid_i = resctrl_get_config_index(closid, CDP_CODE); + + mpam_set_task_partid_pmg(tsk, partid_d, partid_i, rmid, rmid); + } +} + +bool resctrl_arch_match_closid(struct task_struct *tsk, u32 closid) +{ + u64 regval = mpam_get_regval(tsk); + u32 tsk_closid = FIELD_GET(MPAM0_EL1_PARTID_D, regval); + + if (cdp_enabled) + tsk_closid >>= 1; + + return tsk_closid == closid; +} + +/* The task's pmg is not unique, the partid must be considered too */ +bool resctrl_arch_match_rmid(struct task_struct *tsk, u32 closid, u32 rmid) +{ + u64 regval = mpam_get_regval(tsk); + u32 tsk_closid = FIELD_GET(MPAM0_EL1_PARTID_D, regval); + u32 tsk_rmid = FIELD_GET(MPAM0_EL1_PMG_D, regval); + + if (cdp_enabled) + tsk_closid >>= 1; + + return (tsk_closid == closid) && (tsk_rmid == rmid); +} + +int resctrl_arch_set_iommu_closid_rmid(struct iommu_group *group, u32 closid, + u32 rmid) +{ + u16 partid; + + if (!IS_ENABLED(CONFIG_RESCTRL_IOMMU)) + return 0; + + if (cdp_enabled) + partid = closid << 1; + else + partid = closid; + + return iommu_group_set_qos_params(group, partid, rmid); +} + +bool resctrl_arch_match_iommu_closid(struct iommu_group *group, u32 closid) +{ + u16 partid; + int err = iommu_group_get_qos_params(group, &partid, NULL); + + if (!IS_ENABLED(CONFIG_RESCTRL_IOMMU)) + return false; + + if (err) + return false; + + if (cdp_enabled) + partid >>= 1; + + return (partid == closid); +} + +bool resctrl_arch_match_iommu_closid_rmid(struct iommu_group *group, + u32 closid, u32 rmid) +{ + u8 pmg; + u16 partid; + int err = iommu_group_get_qos_params(group, &partid, &pmg); + + if (!IS_ENABLED(CONFIG_RESCTRL_IOMMU)) + return false; + + if (err) + return false; + + if (cdp_enabled) + partid >>= 1; + + return (partid == closid) && (rmid == pmg); +} + +struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l) +{ + if (l >= RDT_NUM_RESOURCES) + return NULL; + + return &mpam_resctrl_controls[l].resctrl_res; +} + +static int resctrl_arch_mon_ctx_alloc_no_wait(enum resctrl_event_id evtid) +{ + struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[evtid]; + + if (!mpam_is_enabled()) + return -EINVAL; + + if (!mon->class) + return -EINVAL; + + switch (evtid) { + case QOS_L3_OCCUP_EVENT_ID: + /* With CDP, one monitor gets used for both code/data reads */ + return mpam_alloc_csu_mon(mon->class); + case QOS_L3_MBM_LOCAL_EVENT_ID: + case QOS_L3_MBM_TOTAL_EVENT_ID: + return USE_PRE_ALLOCATED; + default: + return -EOPNOTSUPP; + } +} + +void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r, + enum resctrl_event_id evtid) +{ + DEFINE_WAIT(wait); + int *ret; + + ret = kmalloc(sizeof(*ret), GFP_KERNEL); + if (!ret) + return ERR_PTR(-ENOMEM); + + do { + prepare_to_wait(&resctrl_mon_ctx_waiters, &wait, + TASK_INTERRUPTIBLE); + *ret = resctrl_arch_mon_ctx_alloc_no_wait(evtid); + if (*ret == -ENOSPC) + schedule(); + } while (*ret == -ENOSPC && !signal_pending(current)); + finish_wait(&resctrl_mon_ctx_waiters, &wait); + + return ret; +} + +static void resctrl_arch_mon_ctx_free_no_wait(enum resctrl_event_id evtid, + u32 mon_idx) +{ + struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[evtid]; + + if (!mpam_is_enabled()) + return; + + if (!mon->class) + return; + + if (evtid == QOS_L3_OCCUP_EVENT_ID) + mpam_free_csu_mon(mon->class, mon_idx); + + wake_up(&resctrl_mon_ctx_waiters); +} + +void resctrl_arch_mon_ctx_free(struct rdt_resource *r, + enum resctrl_event_id evtid, void *arch_mon_ctx) +{ + u32 mon_idx = *(u32 *)arch_mon_ctx; + + kfree(arch_mon_ctx); + arch_mon_ctx = NULL; + + resctrl_arch_mon_ctx_free_no_wait(evtid, mon_idx); +} + +static bool __resctrl_arch_mon_can_overflow(enum resctrl_event_id eventid) +{ + struct mpam_props *cprops; + struct mpam_class *class = mpam_resctrl_counters[eventid].class; + + if (!class) + return false; + + /* No need to worry about a 63 bit counter overflowing */ + cprops = &class->props; + return !mpam_has_feature(mpam_feat_msmon_mbwu_63counter, cprops); +} + +bool resctrl_arch_mon_can_overflow(void) +{ + if (__resctrl_arch_mon_can_overflow(QOS_L3_MBM_LOCAL_EVENT_ID)) + return true; + + return __resctrl_arch_mon_can_overflow(QOS_L3_MBM_TOTAL_EVENT_ID); +} + +static int +__read_mon(struct mpam_resctrl_mon *mon, struct mpam_component *mon_comp, + enum mpam_device_features mon_type, enum mon_filter_options mon_opts, + int mon_idx, + enum resctrl_conf_type cdp_type, u32 closid, u32 rmid, u64 *val) +{ + struct mon_cfg cfg = { }; + + if (!mpam_is_enabled()) + return -EINVAL; + + /* Shift closid to account for CDP */ + closid = resctrl_get_config_index(closid, cdp_type); + + if (mon_idx == USE_PRE_ALLOCATED) { + int mbwu_idx = resctrl_arch_rmid_idx_encode(closid, rmid); + mon_idx = mon->mbwu_idx_to_mon[mbwu_idx]; + if (mon_idx == -1) { + if (mpam_resctrl_abmc_enabled()) { + /* Report Unassigned */ + return -ENOENT; + } + /* Report Unavailable */ + return -EINVAL; + } + } + + cfg.mon = mon_idx; + cfg.match_pmg = true; + cfg.partid = closid; + cfg.pmg = rmid; + cfg.opts = mon_opts; + + if (irqs_disabled()) { + /* Check if we can access this domain without an IPI */ + return -EIO; + } + + return mpam_msmon_read(mon_comp, &cfg, mon_type, val); +} + +static int read_mon_cdp_safe(struct mpam_resctrl_mon *mon, struct mpam_component *mon_comp, + enum mpam_device_features mon_type, enum mon_filter_options mon_opts, + int mon_idx, u32 closid, u32 rmid, u64 *val) +{ + if (cdp_enabled) { + u64 cdp_val = 0; + int err; + + err = __read_mon(mon, mon_comp, mon_type, mon_opts, mon_idx, + CDP_CODE, closid, rmid, &cdp_val); + if (err) + return err; + + err = __read_mon(mon, mon_comp, mon_type, mon_opts, mon_idx, + CDP_DATA, closid, rmid, &cdp_val); + if (!err) + *val += cdp_val; + return err; + } + + return __read_mon(mon, mon_comp, mon_type, mon_idx, mon_opts, + CDP_NONE, closid, rmid, val); +} + +static enum mon_filter_options resctrl_evt_config_to_mpam(u32 local_evt_cfg) +{ + switch (local_evt_cfg) { + case READS_TO_LOCAL_MEM: + return COUNT_READ; + case NON_TEMP_WRITE_TO_LOCAL_MEM: + return COUNT_WRITE; + default: + return COUNT_BOTH; + } +} + +/* MBWU when not in ABMC mode, and CSU counters. */ +int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, + u32 closid, u32 rmid, enum resctrl_event_id eventid, + u64 *val, void *arch_mon_ctx) +{ + struct mpam_resctrl_dom *l3_dom; + struct mpam_component *mon_comp; + enum mon_filter_options mon_opts; + u32 mon_idx = *(u32 *)arch_mon_ctx; + enum mpam_device_features mon_type; + struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[eventid]; + + resctrl_arch_rmid_read_context_check(); + + if (!mpam_is_enabled()) + return -EINVAL; + + if (eventid >= QOS_NUM_EVENTS || !mon->class) + return -EINVAL; + + l3_dom = container_of(d, struct mpam_resctrl_dom, resctrl_mon_dom); + mon_comp = l3_dom->mon_comp[eventid]; + mon_opts = resctrl_evt_config_to_mpam(l3_dom->mbm_local_evt_cfg); + + switch (eventid) { + case QOS_L3_OCCUP_EVENT_ID: + mon_type = mpam_feat_msmon_csu; + break; + case QOS_L3_MBM_LOCAL_EVENT_ID: + case QOS_L3_MBM_TOTAL_EVENT_ID: + mon_type = mpam_feat_msmon_mbwu; + break; + default: + return -EINVAL; + } + + return read_mon_cdp_safe(mon, mon_comp, mon_type, mon_opts, mon_idx, + closid, rmid, val); +} + +/* MBWU counters when in ABMC mode */ +int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_mon_domain *d, + u32 closid, u32 rmid, int mon_idx, + enum resctrl_event_id eventid, u64 *val) +{ + struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[eventid]; + enum mon_filter_options mon_opts; + struct mpam_resctrl_dom *l3_dom; + struct mpam_component *mon_comp; + + if (!mpam_is_enabled()) + return -EINVAL; + + if (eventid == QOS_L3_OCCUP_EVENT_ID || !mon->class) + return -EINVAL; + + l3_dom = container_of(d, struct mpam_resctrl_dom, resctrl_mon_dom); + mon_comp = l3_dom->mon_comp[eventid]; + mon_opts = resctrl_evt_config_to_mpam(l3_dom->mbm_local_evt_cfg); + + return read_mon_cdp_safe(mon, mon_comp, mpam_feat_msmon_mbwu, mon_idx, + mon_opts, closid, rmid, val); +} + +static void __reset_mon(struct mpam_resctrl_mon *mon, struct mpam_component *mon_comp, + int mon_idx, + enum resctrl_conf_type cdp_type, u32 closid, u32 rmid) +{ + struct mon_cfg cfg = { }; + + if (!mpam_is_enabled()) + return; + + /* Shift closid to account for CDP */ + closid = resctrl_get_config_index(closid, cdp_type); + + if (mon_idx == USE_PRE_ALLOCATED) { + int mbwu_idx = resctrl_arch_rmid_idx_encode(closid, rmid); + mon_idx = mon->mbwu_idx_to_mon[mbwu_idx]; + } + + if (mon_idx == -1) + return; + cfg.mon = mon_idx; + mpam_msmon_reset_mbwu(mon_comp, &cfg); +} + +static void reset_mon_cdp_safe(struct mpam_resctrl_mon *mon, struct mpam_component *mon_comp, + int mon_idx, u32 closid, u32 rmid) +{ + if (cdp_enabled) { + __reset_mon(mon, mon_comp, mon_idx, CDP_CODE, closid, rmid); + __reset_mon(mon, mon_comp, mon_idx, CDP_DATA, closid, rmid); + } else { + __reset_mon(mon, mon_comp, mon_idx, CDP_NONE, closid, rmid); + } +} + +/* Called via IPI. Call with read_cpus_lock() held. */ +void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d, + u32 closid, u32 rmid, enum resctrl_event_id eventid) +{ + struct mpam_resctrl_dom *l3_dom; + struct mpam_component *mon_comp; + struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[eventid]; + + if (!mpam_is_enabled()) + return; + + /* Only MBWU counters are relevant, and for supported event types. */ + if (eventid == QOS_L3_OCCUP_EVENT_ID || !mon->class) + return; + + l3_dom = container_of(d, struct mpam_resctrl_dom, resctrl_mon_dom); + mon_comp = l3_dom->mon_comp[eventid]; + + reset_mon_cdp_safe(mon, mon_comp, USE_PRE_ALLOCATED, closid, rmid); +} + +/* Reset an assigned counter */ +void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, + u32 closid, u32 rmid, int cntr_id, + enum resctrl_event_id eventid) +{ + struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[eventid]; + struct mpam_resctrl_dom *l3_dom; + struct mpam_component *mon_comp; + + if (!mpam_is_enabled()) + return; + + if (eventid == QOS_L3_OCCUP_EVENT_ID || !mon->class) + return; + + l3_dom = container_of(d, struct mpam_resctrl_dom, resctrl_mon_dom); + mon_comp = l3_dom->mon_comp[eventid]; + + reset_mon_cdp_safe(mon, mon_comp, USE_PRE_ALLOCATED, closid, rmid); +} + +/* + * The rmid realloc threshold should be for the smallest cache exposed to + * resctrl. + */ +static void update_rmid_limits(unsigned int size) +{ + u32 num_unique_pmg = resctrl_arch_system_num_rmid_idx(); + + if (WARN_ON_ONCE(!size)) + return; + + if (resctrl_rmid_realloc_limit && size > resctrl_rmid_realloc_limit) + return; + + resctrl_rmid_realloc_limit = size; + resctrl_rmid_realloc_threshold = size / num_unique_pmg; +} + +static bool cache_has_usable_cpor(struct mpam_class *class) +{ + struct mpam_props *cprops = &class->props; + + if (!mpam_has_feature(mpam_feat_cpor_part, cprops)) + return false; + + /* TODO: Scaling is not yet supported */ + /* resctrl uses u32 for all bitmap configurations */ + return (class->props.cpbm_wd <= 32); +} + +static bool cache_has_usable_cmax(struct mpam_class *class) +{ + struct mpam_props *cprops = &class->props; + + return mpam_has_feature(mpam_feat_cmax_cmax, cprops); +} + +static bool mba_class_use_mbw_part(struct mpam_props *cprops) +{ + if (!mpam_has_feature(mpam_feat_mbw_part, cprops) || + cprops->mbw_pbm_bits < 1) + return false; + + /* u32 is used to represent MBW PBM bitmaps in the driver, for now: */ + return cprops->mbw_pbm_bits <= 32; +} + +static bool mba_class_use_mbw_max(struct mpam_props *cprops) +{ + return (mpam_has_feature(mpam_feat_mbw_max, cprops) && + cprops->bwa_wd); +} + +static bool class_has_usable_mba(struct mpam_props *cprops) +{ + return mba_class_use_mbw_part(cprops) || mba_class_use_mbw_max(cprops); +} + +static bool cache_has_usable_csu(struct mpam_class *class) +{ + struct mpam_props *cprops; + + if (!class) + return false; + + cprops = &class->props; + + if (!mpam_has_feature(mpam_feat_msmon_csu, cprops)) + return false; + + /* + * CSU counters settle on the value, so we can get away with + * having only one. + */ + if (!cprops->num_csu_mon) + return false; + + return (mpam_partid_max > 1) || (mpam_pmg_max != 0); +} + +static bool class_has_usable_mbwu(struct mpam_class *class) +{ + struct mpam_props *cprops = &class->props; + + if (!mpam_has_feature(mpam_feat_msmon_mbwu, cprops)) + return false; + + /* + * resctrl expects the bandwidth counters to be free running, + * which means we need as many monitors as resctrl has + * control/monitor groups. + */ + if (__mpam_monitors_free_running(cprops->num_mbwu_mon)) { + pr_debug("monitors usable in free-running mode\n"); + return true; + } + + if (cprops->num_mbwu_mon) { + pr_debug("monitors usable via ABMC assignment\n"); + return true; + } + + return false; +} + +/* + * Calculate the worst-case percentage change from each implemented step + * in the control. + */ +static u32 get_mba_granularity(struct mpam_props *cprops) +{ + if (mba_class_use_mbw_part(cprops)) { + return DIV_ROUND_UP(MAX_MBA_BW, cprops->mbw_pbm_bits); + } else if (mba_class_use_mbw_max(cprops)) { + /* + * bwa_wd is the number of bits implemented in the 0.xxx + * fixed point fraction. 1 bit is 50%, 2 is 25% etc. + */ + return DIV_ROUND_UP(MAX_MBA_BW, 1 << cprops->bwa_wd); + } + + return 0; +} + +static u32 mbw_pbm_to_percent(const unsigned long mbw_pbm, + struct mpam_props *cprops) +{ + u32 val = bitmap_weight(&mbw_pbm, (unsigned int)cprops->mbw_pbm_bits); + + if (cprops->mbw_pbm_bits == 0) + return 0; + + val *= MAX_MBA_BW; + val = DIV_ROUND_CLOSEST(val, cprops->mbw_pbm_bits); + + return val; +} + +static u32 percent_to_mbw_pbm(u8 pc, struct mpam_props *cprops) +{ + u32 val = pc; + unsigned long ret = 0; + + if (cprops->mbw_pbm_bits == 0) + return 0; + + val *= cprops->mbw_pbm_bits; + val = DIV_ROUND_CLOSEST(val, MAX_MBA_BW); + + /* TODO: pick bits at random to avoid contention */ + bitmap_set(&ret, 0, val); + return ret; +} + +/* + * Each fixed-point hardware value architecturally represents a range + * of values: the full range 0% - 100% is split contiguously into + * (1 << cprops->bwa_wd) equal bands. + * Find the nearest percentage value to the upper bound of the selected band: + */ +static u32 fract16_to_percent(u16 fract, u8 wd) +{ + u32 val = fract; + + val >>= 16 - wd; + val += 1; + val *= MAX_MBA_BW; + val = DIV_ROUND_CLOSEST(val, 1 << wd); + + return val; +} + +/* + * Find the band whose upper bound is closest to the specified percentage. + * + * A round-to-nearest policy is followed here as a balanced compromise + * between unexpected under-commit of the resource (where the total of + * a set of resource allocations after conversion is less than the + * expected total, due to rounding of the individual converted + * percentages) and over-commit (where the total of the converted + * allocations is greater than expected). + */ +static u16 percent_to_fract16(u8 pc, u8 wd) +{ + u32 val = pc; + + val <<= wd; + val = DIV_ROUND_CLOSEST(val, MAX_MBA_BW); + val = max(val, 1) - 1; + val <<= 16 - wd; + + return val; +} + +static u32 mbw_max_to_percent(u16 mbw_max, struct mpam_props *cprops) +{ + return fract16_to_percent(mbw_max, cprops->bwa_wd); +} + +static u16 percent_to_mbw_max(u8 pc, struct mpam_props *cprops) +{ + return percent_to_fract16(pc, cprops->bwa_wd); +} + +static u16 percent_to_cmax(u8 pc, struct mpam_props *cprops) +{ + return percent_to_fract16(pc, cprops->cmax_wd); +} + +static u32 get_mba_min(struct mpam_props *cprops) +{ + u32 val = 0; + + if (mba_class_use_mbw_part(cprops)) + val = mbw_pbm_to_percent(val, cprops); + else if (mba_class_use_mbw_max(cprops)) + val = mbw_max_to_percent(val, cprops); + else + WARN_ON_ONCE(1); + + return val; +} + +/* Find the L3 cache that has affinity with this CPU */ +static int find_l3_equivalent_bitmask(int cpu, cpumask_var_t tmp_cpumask) +{ + int err; + u32 cache_id = get_cpu_cacheinfo_id(cpu, 3); + + lockdep_assert_cpus_held(); + + err = mpam_get_cpumask_from_cache_id(cache_id, 3, tmp_cpumask); + return err; +} + +/* + * topology_matches_l3() - Is the provided class the same shape as L3 + * @victim: The class we'd like to pretend is L3. + * + * resctrl expects all the worlds a Xeon, and all counters are on the + * L3. We play fast and loose with this, mapping counters on other + * classes - provided the CPU->domain mapping is the same kind of shape. + * + * Using cacheinfo directly would make this work even if resctrl can't + * use the L3 - but cacheinfo can't tell us anything about offline CPUs. + * Using the L3 resctrl domain list also depends on CPUs being online. + * Using the mpam_class we picked for L3 so we can use its domain list + * assumes that there are MPAM controls on the L3. + * Instead, this path eventually uses the mpam_get_cpumask_from_cache_id() + * helper. This relies on at least one CPU per L3 cache being online at + * boot. + * + * Walk the two component lists and compare the affinity masks. The topology + * matches if each victim:component has a corresponding L3:component with the + * same affinity mask. These lists/masks are computed from firmware tables so + * don't change at runtime. + */ +static bool topology_matches_l3(struct mpam_class *victim) +{ + int cpu, err; + struct mpam_component *victim_iter; + cpumask_var_t __free(free_cpumask_var) tmp_cpumask; + + if (!alloc_cpumask_var(&tmp_cpumask, GFP_KERNEL)) + return false; + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(victim_iter, &victim->components, class_list, + srcu_read_lock_held(&mpam_srcu)) { + if (cpumask_empty(&victim_iter->affinity)) { + pr_debug("class %u has CPU-less component %u - can't match L3!\n", + victim->level, victim_iter->comp_id); + return false; + } + + cpu = cpumask_any(&victim_iter->affinity); + if (WARN_ON_ONCE(cpu >= nr_cpu_ids)) + return false; + + cpumask_clear(tmp_cpumask); + err = find_l3_equivalent_bitmask(cpu, tmp_cpumask); + if (err) { + pr_debug("Failed to find L3's equivalent component to class %u component %u\n", + victim->level, victim_iter->comp_id); + return false; + } + + /* Any differing bits in the affinity mask? */ + if (!cpumask_equal(tmp_cpumask, &victim_iter->affinity)) { + pr_debug("class %u component %u has Mismatched CPU mask with L3 equivalent\n" + "L3:%*pbl != victim:%*pbl\n", + victim->level, victim_iter->comp_id, + cpumask_pr_args(tmp_cpumask), + cpumask_pr_args(&victim_iter->affinity)); + + return false; + } + } + + return true; +} + +static bool topology_matches_numa(struct mpam_class *victim) +{ + /* + * For now, check this is a memory class, in which case component + * id are already NUMA nid. + */ + return (victim->type == MPAM_CLASS_MEMORY); +} + +/* Test whether we can export MPAM_CLASS_CACHE:{2,3}? */ +static void mpam_resctrl_pick_caches(void) +{ + bool has_cpor, has_cmax; + struct mpam_class *class; + struct mpam_resctrl_res *res; + + lockdep_assert_cpus_held(); + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(class, &mpam_classes, classes_list, + srcu_read_lock_held(&mpam_srcu)) { + if (class->type != MPAM_CLASS_CACHE) { + pr_debug("class %u is not a cache\n", class->level); + continue; + } + + if (class->level != 2 && class->level != 3) { + pr_debug("class %u is not L2 or L3\n", class->level); + continue; + } + + has_cpor = cache_has_usable_cpor(class); + has_cmax = cache_has_usable_cmax(class); + if (!has_cpor && !has_cmax) { + pr_debug("class %u cache misses CPOR\n", class->level); + continue; + } + + if (!cpumask_equal(&class->affinity, cpu_possible_mask)) { + pr_debug("class %u Class has missing CPUs\n", class->level); + pr_debug("class %u mask %*pb != %*pb\n", class->level, + cpumask_pr_args(&class->affinity), + cpumask_pr_args(cpu_possible_mask)); + continue; + } + + if (has_cpor) { + pr_debug("pick_caches: Class has CPOR\n"); + if (class->level == 2) + res = &mpam_resctrl_controls[RDT_RESOURCE_L2]; + else + res = &mpam_resctrl_controls[RDT_RESOURCE_L3]; + res->class = class; + exposed_alloc_capable = true; + } + if (has_cmax) { + pr_debug("pick_caches: Class has CMAX\n"); + if (class->level == 2) + res = &mpam_resctrl_controls[RDT_RESOURCE_L2_MAX]; + else + res = &mpam_resctrl_controls[RDT_RESOURCE_L3_MAX]; + res->class = class; + exposed_alloc_capable = true; + } + } +} + +static void mpam_resctrl_pick_mba(void) +{ + struct mpam_class *class, *candidate_class = NULL; + struct mpam_resctrl_res *res; + + lockdep_assert_cpus_held(); + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(class, &mpam_classes, classes_list, + srcu_read_lock_held(&mpam_srcu)) { + struct mpam_props *cprops = &class->props; + bool l3_cache_id_possible = false; + bool numa_nid_possible = false; + + if (class->level < 3) { + pr_debug("class %u is before L3\n", class->level); + continue; + } + + if (!class_has_usable_mba(cprops)) { + pr_debug("class %u has no bandwidth control\n", class->level); + continue; + } + + if (!cpumask_equal(&class->affinity, cpu_possible_mask)) { + pr_debug("class %u has missing CPUs\n", class->level); + continue; + } + + if (topology_matches_numa(class)) { + pr_debug("class %u topology matches NUMA domains\n", class->level); + numa_nid_possible = true; + } + + if (topology_matches_l3(class)) { + pr_debug("class %u topology matches L3\n", class->level); + l3_cache_id_possible = true; + } + + if (!l3_cache_id_possible && !numa_nid_possible) { + pr_debug("class %u has no matching topology for MB\n", class->level); + continue; + } + + /* + * mba_sc reads the mbm_local counter, and waggles the MBA controls. + * mbm_local is implicitly part of the L3, pick a resource to be MBA + * that as close as possible to the L3. + */ + if (!candidate_class || class->level < candidate_class->level) { + /* + * Refuse to pick a closer class if it would prevent cache-id + * being used as domain-id by default. + */ + if (!candidate_class || l3_cache_id_possible) { + candidate_class = class; + mb_l3_cache_id_possible = l3_cache_id_possible; + mb_numa_nid_possible = numa_nid_possible; + } + } + } + + if (candidate_class) { + pr_debug("selected class %u to back MBA\n", candidate_class->level); + res = &mpam_resctrl_controls[RDT_RESOURCE_MBA]; + res->class = candidate_class; + exposed_alloc_capable = true; + } +} + +static void __free_mbwu_mon(struct mpam_class *class, int *array, + u16 num_mbwu_mon) +{ + for (int i = 0; i < num_mbwu_mon; i++) { + if (array[i] < 0) + continue; + + mpam_free_mbwu_mon(class, array[i]); + array[i] = ~0; + } +} + +static int __alloc_mbwu_mon(struct mpam_class *class, int *array, + u16 num_mbwu_mon) +{ + for (int i = 0; i < num_mbwu_mon; i++) { + int mbwu_mon = mpam_alloc_mbwu_mon(class); + + if (mbwu_mon < 0) { + __free_mbwu_mon(class, array, num_mbwu_mon); + return mbwu_mon; + } + array[i] = mbwu_mon; + } + + l3_num_allocated_mbwu = min(l3_num_allocated_mbwu, num_mbwu_mon); + + return 0; +} + +static int *__alloc_mbwu_array(struct mpam_class *class, u16 num_mbwu_mon) +{ + int err; + size_t array_size = num_mbwu_mon * sizeof(int); + int *array __free(kfree) = kmalloc(array_size, GFP_KERNEL); + + if (!array) + return ERR_PTR(-ENOMEM); + + memset(array, -1, array_size); + + err = __alloc_mbwu_mon(class, array, num_mbwu_mon); + if (err) + return ERR_PTR(err); + return_ptr(array); +} + +static void counter_update_class(enum resctrl_event_id evt_id, + struct mpam_class *class) +{ + struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[evt_id]; + struct mpam_class *existing_class = mon->class; + u16 num_mbwu_mon = class->props.num_mbwu_mon; + int *existing_array = mon->mbwu_idx_to_mon; + + if (existing_class) { + if (class->level == 3) { + pr_debug("Existing class is L3 - L3 wins\n"); + return; + } else if (existing_class->level < class->level) { + pr_debug("Existing class is closer to L3, %u versus %u - closer is better\n", + existing_class->level, class->level); + return; + } + } + + pr_debug("Updating event %u to use class %u\n", evt_id, class->level); + mon->class = class; + exposed_mon_capable = true; + + if (evt_id == QOS_L3_OCCUP_EVENT_ID) + return; + + /* Might not need all the monitors */ + num_mbwu_mon = __mpam_monitors_free_running(num_mbwu_mon); + if (!num_mbwu_mon) { + pr_debug("Not pre-allocating free-running counters\n"); + return; + } + + /* + * This is the pre-allocated free-running monitors path. It always + * allocates one monitor per PARTID * PMG. + */ + WARN_ON_ONCE(num_mbwu_mon != resctrl_arch_system_num_rmid_idx()); + + mon->mbwu_idx_to_mon = __alloc_mbwu_array(class, num_mbwu_mon); + if (IS_ERR(mon->mbwu_idx_to_mon)) { + pr_debug("Failed to allocate MBWU array\n"); + mon->class = existing_class; + mon->mbwu_idx_to_mon = existing_array; + return; + } + + if (existing_array) { + pr_debug("Releasing previous class %u's monitors\n", + existing_class->level); + __free_mbwu_mon(existing_class, existing_array, num_mbwu_mon); + kfree(existing_array); + } +} + +static void mpam_resctrl_pick_counters(void) +{ + struct mpam_class *class; + unsigned int cache_size; + bool has_csu, has_mbwu; + + lockdep_assert_cpus_held(); + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(class, &mpam_classes, classes_list, + srcu_read_lock_held(&mpam_srcu)) { + struct mpam_props *cprops = &class->props; + + if (class->level < 3) { + pr_debug("class %u is before L3", class->level); + continue; + } + + if (!cpumask_equal(&class->affinity, cpu_possible_mask)) { + pr_debug("class %u does not cover all CPUs", class->level); + continue; + } + + has_csu = cache_has_usable_csu(class); + if (has_csu && topology_matches_l3(class)) { + pr_debug("class %u has usable CSU, and matches L3 topology", class->level); + + /* CSU counters only make sense on a cache. */ + switch (class->type) { + case MPAM_CLASS_CACHE: + /* Assume cache levels are the same size for all CPUs... */ + cache_size = get_cpu_cacheinfo_size(smp_processor_id(), + class->level); + if (!cache_size) { + pr_debug("Could not read cache size for class %u\n", + class->level); + continue; + } + + if (mpam_has_feature(mpam_feat_msmon_csu, cprops)) + update_rmid_limits(cache_size); + + counter_update_class(QOS_L3_OCCUP_EVENT_ID, class); + return; + default: + return; + } + } + + has_mbwu = class_has_usable_mbwu(class); + if (has_mbwu && topology_matches_l3(class)) { + pr_debug("class %u has usable MBWU, and matches L3 topology", class->level); + + /* + * MBWU counters may be 'local' or 'total' depending on + * where they are in the topology. Counters on caches + * are assumed to be local. If it's on the memory + * controller, its assumed to be global. + * TODO: check mbm_local matches NUMA boundaries... + */ + switch (class->type) { + case MPAM_CLASS_CACHE: + counter_update_class(QOS_L3_MBM_LOCAL_EVENT_ID, + class); + break; + case MPAM_CLASS_MEMORY: + counter_update_class(QOS_L3_MBM_TOTAL_EVENT_ID, + class); + break; + default: + break; + } + } + } + + /* Allocation of MBWU monitors assumes that the class is unique... */ + if (mpam_resctrl_counters[QOS_L3_MBM_LOCAL_EVENT_ID].class) + WARN_ON_ONCE(mpam_resctrl_counters[QOS_L3_MBM_LOCAL_EVENT_ID].class == + mpam_resctrl_counters[QOS_L3_MBM_TOTAL_EVENT_ID].class); +} + +bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt) +{ + struct mpam_class *class; + struct mpam_props *cprops; + + class = mpam_resctrl_counters[evt].class; + if (!class) + return false; + + cprops = &class->props; + + return mpam_has_feature(mpam_feat_msmon_mbwu_rwbw, cprops); +} + +void resctrl_arch_mon_event_config_read(void *info) +{ + struct mpam_resctrl_dom *dom; + struct resctrl_mon_config_info *mon_info = info; + + if (!mpam_is_enabled()) { + mon_info->mon_config = 0; + return; + } + + dom = container_of(mon_info->d, struct mpam_resctrl_dom, resctrl_mon_dom); + mon_info->mon_config = dom->mbm_local_evt_cfg & MAX_EVT_CONFIG_BITS; +} + +void resctrl_arch_mon_event_config_write(void *info) +{ + struct mpam_resctrl_dom *dom; + struct resctrl_mon_config_info *mon_info = info; + + WARN_ON_ONCE(mon_info->mon_config & ~MPAM_RESTRL_EVT_CONFIG_VALID); + + dom = container_of(mon_info->d, struct mpam_resctrl_dom, resctrl_mon_dom); + + if (!mpam_is_enabled()) { + dom->mbm_local_evt_cfg = 0; + return; + } + + dom->mbm_local_evt_cfg = mon_info->mon_config & MPAM_RESTRL_EVT_CONFIG_VALID; +} + +void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d) +{ + int i; + struct mpam_resctrl_dom *dom; + struct mpam_resctrl_mon *mon; + struct mpam_component *mon_comp; + + dom = container_of(d, struct mpam_resctrl_dom, resctrl_mon_dom); + if (!mpam_is_enabled()) { + dom->mbm_local_evt_cfg = 0; + return; + } + dom->mbm_local_evt_cfg = MPAM_RESTRL_EVT_CONFIG_VALID; + + /* + * Monitors may be backed by different classes of MSC, all + * possible components need to be reset... + */ + for (i = 0; i < QOS_NUM_EVENTS; i++) { + mon = &mpam_resctrl_counters[i]; + if (!mon->class) + continue; // dummy resource + + mon_comp = dom->mon_comp[i]; + if (!mon_comp) + continue; + + mpam_msmon_reset_all_mbwu(mon_comp); + } +} + +static void __config_cntr(struct mpam_resctrl_mon *mon, u32 cntr_id, + enum resctrl_conf_type cdp_type, u32 closid, u32 rmid, + bool assign) +{ + u32 mbwu_idx, mon_idx = resctrl_get_config_index(cntr_id, cdp_type); + + closid = resctrl_get_config_index(closid, cdp_type); + mbwu_idx = resctrl_arch_rmid_idx_encode(closid, rmid); + WARN_ON_ONCE(mon_idx > l3_num_allocated_mbwu); + + if (assign) + mon->mbwu_idx_to_mon[mbwu_idx] = mon->assigned_counters[mon_idx]; + else + mon->mbwu_idx_to_mon[mbwu_idx] = -1; +} + +void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, + enum resctrl_event_id evtid, u32 rmid, u32 closid, + u32 cntr_id, bool assign) +{ + struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[evtid]; + + if (!mon->mbwu_idx_to_mon || !mon->assigned_counters) { + pr_debug("monitor arrays not allocated\n"); + return; + } + + if (cdp_enabled) { + __config_cntr(mon, cntr_id, CDP_CODE, closid, rmid, assign); + __config_cntr(mon, cntr_id, CDP_DATA, closid, rmid, assign); + } else { + __config_cntr(mon, cntr_id, CDP_NONE, closid, rmid, assign); + } + + resctrl_arch_reset_rmid(r, d, closid, rmid, evtid); +} + +bool resctrl_arch_mbm_cntr_assign_enabled(struct rdt_resource *r) +{ + if (r != &mpam_resctrl_controls[RDT_RESOURCE_L3].resctrl_res) + return false; + + return mpam_resctrl_abmc_enabled(); +} + +int resctrl_arch_mbm_cntr_assign_set(struct rdt_resource *r, bool enable) +{ + lockdep_assert_cpus_held(); + + WARN_ON_ONCE(1); + + return 0; +} + +static int mpam_resctrl_control_init(struct mpam_resctrl_res *res, + enum resctrl_res_level type) +{ + struct mpam_class *class = res->class; + struct mpam_props *cprops = &class->props; + struct rdt_resource *r = &res->resctrl_res; + + switch (res->resctrl_res.rid) { + case RDT_RESOURCE_L2: + case RDT_RESOURCE_L3: + r->alloc_capable = true; + r->schema_fmt = RESCTRL_SCHEMA_BITMAP; + r->cache.arch_has_sparse_bitmasks = true; + + /* TODO: Scaling is not yet supported */ + r->cache.cbm_len = class->props.cpbm_wd; + /* mpam_devices will reject empty bitmaps */ + r->cache.min_cbm_bits = 1; + + if (r->rid == RDT_RESOURCE_L2) { + r->name = "L2"; + r->ctrl_scope = RESCTRL_L2_CACHE; + } else { + r->name = "L3"; + r->ctrl_scope = RESCTRL_L3_CACHE; + } + + /* + * Which bits are shared with other ...things... + * Unknown devices use partid-0 which uses all the bitmap + * fields. Until we configured the SMMU and GIC not to do this + * 'all the bits' is the correct answer here. + */ + r->cache.shareable_bits = resctrl_get_resource_default_ctrl(r); + break; + case RDT_RESOURCE_L2_MAX: + case RDT_RESOURCE_L3_MAX: + r->alloc_capable = true; + r->schema_fmt = RESCTRL_SCHEMA_PERCENT; + r->membw.min_bw = max(100 / (1 << cprops->cmax_wd), 1); + r->membw.bw_gran = max(100 / (1 << cprops->cmax_wd), 1); + r->membw.max_bw = 100; + + if (r->rid == RDT_RESOURCE_L2_MAX) { + r->name = "L2_MAX"; + r->ctrl_scope = RESCTRL_L2_CACHE; + } else { + r->name = "L3_MAX"; + r->ctrl_scope = RESCTRL_L3_CACHE; + } + + break; + case RDT_RESOURCE_MBA: + /* Domain ID is the L3 cache-id by default */ + if (mb_l3_cache_id_possible) + r->alloc_capable = true; + + r->schema_fmt = RESCTRL_SCHEMA_PERCENT; + r->ctrl_scope = RESCTRL_L3_CACHE; + + r->mba.delay_linear = true; + r->mba.throttle_mode = THREAD_THROTTLE_UNDEFINED; + r->membw.min_bw = get_mba_min(cprops); + r->membw.max_bw = MAX_MBA_BW; + r->membw.bw_gran = get_mba_granularity(cprops); + + r->name = "MB"; + + break; + default: + break; + } + + return 0; +} + +static int mpam_resctrl_pick_domain_id(int cpu, struct mpam_component *comp) +{ + bool is_mb; + struct mpam_class *class = comp->class; + + is_mb = (mpam_resctrl_controls[RDT_RESOURCE_MBA].class == class); + + if (is_mb && mb_uses_numa_nid && topology_matches_numa(class)) + return comp->comp_id; + + if (class->type == MPAM_CLASS_CACHE) + return comp->comp_id; + + if (topology_matches_l3(class)) { + /* Use the corresponding L3 component ID as the domain ID */ + int id = get_cpu_cacheinfo_id(cpu, 3); + + /* Implies topology_matches_l3() made a mistake */ + if (WARN_ON_ONCE(id == -1)) + return comp->comp_id; + + return id; + } + + /* + * Otherwise, expose the ID used by the firmware table code. + */ + return comp->comp_id; +} + +/* + * This must run after all event counters have been picked so that any free + * running counters have already been allocated. + */ +static int mpam_resctrl_monitor_init_abmc(struct mpam_resctrl_mon *mon) +{ + struct mpam_resctrl_res *res = &mpam_resctrl_controls[RDT_RESOURCE_L3]; + size_t array_size = resctrl_arch_system_num_rmid_idx() * sizeof(int); + int *rmid_array __free(kfree) = kmalloc(array_size, GFP_KERNEL); + struct rdt_resource *l3 = &res->resctrl_res; + struct mpam_class *class = mon->class; + u16 num_mbwu_mon; + + if (mon->mbwu_idx_to_mon) { + pr_debug("monitors free running\n"); + return 0; + } + + if (!rmid_array) { + pr_debug("Failed to allocate RMID array\n"); + return -ENOMEM; + } + memset(rmid_array, -1, array_size); + + num_mbwu_mon = class->props.num_mbwu_mon; + mon->assigned_counters = __alloc_mbwu_array(mon->class, num_mbwu_mon); + if (IS_ERR(mon->assigned_counters)) + return PTR_ERR(mon->assigned_counters); + mon->mbwu_idx_to_mon = no_free_ptr(rmid_array); + + mpam_resctrl_monitor_sync_abmc_vals(l3); + + return 0; +} + +bool resctrl_arch_get_mb_uses_numa_nid(void) +{ + return mb_uses_numa_nid; +} + +int resctrl_arch_set_mb_uses_numa_nid(bool enabled) +{ + struct rdt_resource *r; + struct mpam_resctrl_res *res; + struct mpam_resctrl_dom *dom; + struct rdt_ctrl_domain *ctrl_d; + + lockdep_assert_cpus_held(); + lockdep_assert_mems_held(); + + if (!mb_numa_nid_possible) + return -EOPNOTSUPP; + + if (mb_uses_numa_nid == enabled) + return 0; + + /* Domain IDs as NUMA nid is only defined for MBA */ + res = &mpam_resctrl_controls[RDT_RESOURCE_MBA]; + if (!res->class) + return -EOPNOTSUPP; + r = &res->resctrl_res; + + /* repaint the domain IDs */ + mb_uses_numa_nid = enabled; + list_for_each_entry(ctrl_d, &r->ctrl_domains, hdr.list) { + int cpu = cpumask_any(&ctrl_d->hdr.cpu_mask); + + dom = container_of(ctrl_d, struct mpam_resctrl_dom, resctrl_ctrl_dom); + ctrl_d->hdr.id = mpam_resctrl_pick_domain_id(cpu, dom->ctrl_comp); + } + + /* monitor domains are unaffected and should continue to use the L3 */ + + if (!enabled && mb_l3_cache_id_possible) + r->alloc_capable = true; + else if (enabled && mb_numa_nid_possible) + r->alloc_capable = true; + else + r->alloc_capable = false; + + return 0; +} + +static void mpam_resctrl_monitor_init(struct mpam_resctrl_mon *mon, + enum resctrl_event_id type) +{ + struct mpam_resctrl_res *res = &mpam_resctrl_controls[RDT_RESOURCE_L3]; + struct rdt_resource *l3 = &res->resctrl_res; + + lockdep_assert_cpus_held(); + + /* There also needs to be an L3 cache present */ + if (get_cpu_cacheinfo_id(smp_processor_id(), 3) == -1) + return; + + /* + * If there are no MPAM resources on L3, force it into existence. + * topology_matches_l3() already ensures this looks like the L3. + * The domain-ids will be fixed up by mpam_resctrl_domain_hdr_init(). + */ + if (!res->class) { + pr_warn_once("Faking L3 MSC to enable counters.\n"); + res->class = mpam_resctrl_counters[type].class; + } + + /* Called multiple times!, once per event type */ + if (exposed_mon_capable) { + l3->mon_capable = true; + + /* Setting name is necessary on monitor only platforms */ + l3->name = "L3"; + l3->mon_scope = RESCTRL_L3_CACHE; + + resctrl_enable_mon_event(type); + + /* + * Unfortunately, num_rmid doesn't mean anything for + * mpam, and its exposed to user-space! + * + * num-rmid is supposed to mean the minimum number of + * monitoring groups that can exist simultaneously, including + * the default monitoring group for each control group. + * + * For mpam, each control group has its own pmg/rmid space, so + * it is not appropriate to advertise the whole rmid_idx space + * here. But the pmgs corresponding to the parent control + * group can be allocated freely: + */ + l3->mon.num_rmid = mpam_pmg_max + 1;; + + switch (type) { + case QOS_L3_MBM_LOCAL_EVENT_ID: + case QOS_L3_MBM_TOTAL_EVENT_ID: + mpam_resctrl_monitor_init_abmc(mon); + l3->mon.mbm_cfg_mask = MPAM_RESTRL_EVT_CONFIG_VALID; + + return; + default: + return; + } + } +} + +u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d, + u32 closid, enum resctrl_conf_type type) +{ + u32 partid; + struct mpam_config *cfg; + struct mpam_props *cprops; + struct mpam_resctrl_res *res; + struct mpam_resctrl_dom *dom; + enum mpam_device_features configured_by; + + lockdep_assert_cpus_held(); + + if (!mpam_is_enabled()) + goto err; + + res = container_of(r, struct mpam_resctrl_res, resctrl_res); + dom = container_of(d, struct mpam_resctrl_dom, resctrl_ctrl_dom); + cprops = &res->class->props; + + /* + * When CDP is enabled, but the resource doesn't support it, + * the control is cloned across both partids. + * Pick one at random to read: + */ + if (mpam_resctrl_hide_cdp(r->rid)) + type = CDP_DATA; + + partid = resctrl_get_config_index(closid, type); + cfg = &dom->ctrl_comp->cfg[partid]; + + switch (r->rid) { + case RDT_RESOURCE_L2: + case RDT_RESOURCE_L3: + configured_by = mpam_feat_cpor_part; + break; + case RDT_RESOURCE_L2_MAX: + case RDT_RESOURCE_L3_MAX: + configured_by = mpam_feat_cmax_cmax; + break; + case RDT_RESOURCE_MBA: + if (mba_class_use_mbw_part(cprops)) { + configured_by = mpam_feat_mbw_part; + break; + } else if (mpam_has_feature(mpam_feat_mbw_max, cprops)) { + configured_by = mpam_feat_mbw_max; + break; + } + fallthrough; + default: + goto err; + } + + if (!r->alloc_capable || partid >= resctrl_arch_get_num_closid(r) || + !mpam_has_feature(configured_by, cfg)) + goto err; + + switch (configured_by) { + case mpam_feat_cpor_part: + /* TODO: Scaling is not yet supported */ + return cfg->cpbm; + case mpam_feat_cmax_cmax: + return fract16_to_percent(cfg->cmax, cprops->cmax_wd); + case mpam_feat_mbw_part: + /* TODO: Scaling is not yet supported */ + return mbw_pbm_to_percent(cfg->mbw_pbm, cprops); + case mpam_feat_mbw_max: + return mbw_max_to_percent(cfg->mbw_max, cprops); + default: + goto err; + } + +err: + return resctrl_get_resource_default_ctrl(r); +} + +int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d, + u32 closid, enum resctrl_conf_type t, u32 cfg_val) +{ + int err; + u32 partid; + struct mpam_config cfg; + struct mpam_props *cprops; + struct mpam_resctrl_res *res; + struct mpam_resctrl_dom *dom; + + lockdep_assert_cpus_held(); + lockdep_assert_irqs_enabled(); + + if (!mpam_is_enabled()) + return -EINVAL; + + /* + * NOTE: don't check the CPU as mpam_apply_config() doesn't care, + * and resctrl_arch_update_domains() depends on this. + */ + res = container_of(r, struct mpam_resctrl_res, resctrl_res); + dom = container_of(d, struct mpam_resctrl_dom, resctrl_ctrl_dom); + cprops = &res->class->props; + + partid = resctrl_get_config_index(closid, t); + if (!r->alloc_capable || partid >= resctrl_arch_get_num_closid(r)) { + pr_debug("Not alloc capable or computed PARTID out of range\n"); + return -EINVAL; + } + + /* + * Copy the current config to avoid clearing other resources when the + * same component is exposed multiple times through resctrl. + */ + cfg = dom->ctrl_comp->cfg[partid]; + + switch (r->rid) { + case RDT_RESOURCE_L2: + case RDT_RESOURCE_L3: + /* TODO: Scaling is not yet supported */ + cfg.cpbm = cfg_val; + mpam_set_feature(mpam_feat_cpor_part, &cfg); + break; + case RDT_RESOURCE_L2_MAX: + case RDT_RESOURCE_L3_MAX: + cfg.cmax = percent_to_cmax(cfg_val, cprops); + mpam_set_feature(mpam_feat_cmax_cmax, &cfg); + break; + case RDT_RESOURCE_MBA: + if (mba_class_use_mbw_part(cprops)) { + cfg.mbw_pbm = percent_to_mbw_pbm(cfg_val, cprops); + mpam_set_feature(mpam_feat_mbw_part, &cfg); + break; + } else if (mpam_has_feature(mpam_feat_mbw_max, cprops)) { + cfg.mbw_max = percent_to_mbw_max(cfg_val, cprops); + mpam_set_feature(mpam_feat_mbw_max, &cfg); + break; + } + fallthrough; + default: + return -EINVAL; + } + + /* + * When CDP is enabled, but the resource doesn't support it, we need to + * apply the same configuration to the other partid. + */ + if (mpam_resctrl_hide_cdp(r->rid)) { + partid = resctrl_get_config_index(closid, CDP_CODE); + err = mpam_apply_config(dom->ctrl_comp, partid, &cfg); + if (err) + return err; + + partid = resctrl_get_config_index(closid, CDP_DATA); + return mpam_apply_config(dom->ctrl_comp, partid, &cfg); + + } else { + return mpam_apply_config(dom->ctrl_comp, partid, &cfg); + } +} + +/* TODO: this is IPI heavy */ +int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid) +{ + int err = 0; + enum resctrl_conf_type t; + struct rdt_ctrl_domain *d; + struct resctrl_staged_config *cfg; + + lockdep_assert_cpus_held(); + lockdep_assert_irqs_enabled(); + + if (!mpam_is_enabled()) + return -EINVAL; + + list_for_each_entry(d, &r->ctrl_domains, hdr.list) { + for (t = 0; t < CDP_NUM_TYPES; t++) { + cfg = &d->staged_config[t]; + if (!cfg->have_new_ctrl) + continue; + + err = resctrl_arch_update_one(r, d, closid, t, + cfg->new_ctrl); + if (err) + return err; + } + } + + return err; +} + +void resctrl_arch_reset_all_ctrls(struct rdt_resource *r) +{ + struct mpam_resctrl_res *res; + + lockdep_assert_cpus_held(); + + if (!mpam_is_enabled()) + return; + + res = container_of(r, struct mpam_resctrl_res, resctrl_res); + mpam_reset_class_locked(res->class); +} + +/** + * mpam_resctrl_domain_hdr_init() - Bring a subset of a domain online. + * @onlined_cpus: The set of CPUs that are online from the domain's + * perspective. + * @comp: The mpam component being brought online. + * @hdr: The header representing the domain. + * + * Adds @onlined_cpus to @hdr's cpu_mask, and sets the @hdr id. + * For NUMA nodes, @onlined_cpus will be cpu_possible_mask. + */ +static void mpam_resctrl_domain_hdr_init(const struct cpumask *onlined_cpus, + struct mpam_component *comp, + struct rdt_domain_hdr *hdr) +{ + int cpu = cpumask_any(onlined_cpus); + + lockdep_assert_cpus_held(); + + INIT_LIST_HEAD(&hdr->list); + hdr->id = mpam_resctrl_pick_domain_id(cpu, comp); + cpumask_and(&hdr->cpu_mask, &hdr->cpu_mask, onlined_cpus); +} + +/** + * mpam_resctrl_offline_domain_hdr() - Take a subset of a domain offline. + * @offlined_cpus: The set of CPUs that are offline from the domain's + * perspective. + * @hdr: The domain's header. + * + * Removes @offlined_cpus from @hdr's cpu_mask. If the list is empty, + * the domain header is removed from its parent list and true is returned, + * indicating the parent structure can be freed. + * If there are other CPUs in the domain, returns false. + * + * For NUMA nodes, @offlined_cpus will be cpu_possible_mask. + */ +static bool mpam_resctrl_offline_domain_hdr(const struct cpumask *offlined_cpus, + struct rdt_domain_hdr *hdr) +{ + cpumask_andnot(&hdr->cpu_mask, &hdr->cpu_mask, offlined_cpus); + if (cpumask_empty(&hdr->cpu_mask)) { + list_del(&hdr->list); + return true; + } + + return false; +} + +static struct mpam_component *find_component(struct mpam_class *victim, + const struct cpumask *onlined_cpus) +{ + struct mpam_component *victim_comp; + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(victim_comp, &victim->components, class_list, + srcu_read_lock_held(&mpam_srcu)) { + struct cpumask tmp; + + cpumask_andnot(&tmp, onlined_cpus, &victim_comp->affinity); + if (cpumask_empty(&tmp)) + return victim_comp; + } + + return NULL; +} + +static void mpam_resctrl_domain_insert(struct list_head *list, + struct rdt_domain_hdr *new) +{ + struct rdt_domain_hdr *err; + struct list_head *pos = NULL; + + lockdep_assert_held(&domain_list_lock); + + err = resctrl_find_domain(list, new->id, &pos); + if (WARN_ON_ONCE(err)) + return; + + list_add_tail_rcu(&new->list, pos); +} + +static struct mpam_resctrl_dom * +mpam_resctrl_alloc_domain(const struct cpumask *onlined_cpus, int nid, + struct mpam_component *ctrl_comp, + struct mpam_resctrl_res *res) +{ + int err; + struct mpam_resctrl_dom *dom; + struct rdt_mon_domain *mon_d; + struct rdt_ctrl_domain *ctrl_d; + struct rdt_resource *r = &res->resctrl_res; + + lockdep_assert_held(&domain_list_lock); + + dom = kzalloc_node(sizeof(*dom), GFP_KERNEL, nid); + if (!dom) + return ERR_PTR(-ENOMEM); + + if (exposed_alloc_capable) { + dom->ctrl_comp = ctrl_comp; + + ctrl_d = &dom->resctrl_ctrl_dom; + mpam_resctrl_domain_hdr_init(onlined_cpus, ctrl_comp, &ctrl_d->hdr); + ctrl_d->hdr.type = RESCTRL_CTRL_DOMAIN; + mpam_resctrl_domain_insert(&r->ctrl_domains, &ctrl_d->hdr); + err = resctrl_online_ctrl_domain(r, ctrl_d); + if (err) { + dom = ERR_PTR(err); + goto offline_ctrl_domain; + } + } else { + pr_debug("Skipped control domain online - no controls\n"); + } + + if (exposed_mon_capable) { + int i; + struct mpam_component *mon_comp, *any_mon_comp; + + /* + * Even if the monitor domain is backed by a different component, + * the L3 component IDs need to be used... only there may be no + * ctrl_comp for the L3. + * Search each event's class list for a component with overlapping + * CPUs and set up the dom->mon_comp array. + */ + for (i = 0; i < QOS_NUM_EVENTS; i++) { + struct mpam_resctrl_mon *mon; + + mon = &mpam_resctrl_counters[i]; + if (!mon->class) + continue; // dummy resource + + mon_comp = find_component(mon->class, onlined_cpus); + dom->mon_comp[i] = mon_comp; + if (mon_comp) + any_mon_comp = mon_comp; + } + WARN_ON_ONCE(!any_mon_comp); + + dom->mbm_local_evt_cfg = MPAM_RESTRL_EVT_CONFIG_VALID; + + mon_d = &dom->resctrl_mon_dom; + mpam_resctrl_domain_hdr_init(onlined_cpus, any_mon_comp, + &mon_d->hdr); + mon_d->hdr.type = RESCTRL_MON_DOMAIN; + mpam_resctrl_domain_insert(&r->mon_domains, &mon_d->hdr); + err = resctrl_online_mon_domain(r, mon_d); + if (err) { + dom = ERR_PTR(err); + goto offline_mon_hdr; + } + } else { + pr_debug("Skipped monitor domain online - no monitors\n"); + } + goto out; + +offline_mon_hdr: + mpam_resctrl_offline_domain_hdr(onlined_cpus, &ctrl_d->hdr); + +offline_ctrl_domain: + resctrl_offline_ctrl_domain(r, ctrl_d); +out: + return dom; +} + +/* + * We know all the monitors are associated with the L3, even if there are no + * controls and therefore no control component. Find the cache-id for the CPU + * and use that to search for existing resctrl domains. + * This relies on mpam_resctrl_pick_domain_id() using the L3 cache-id + * for anything that is not a cache. + */ +static struct mpam_resctrl_dom *mpam_resctrl_get_mon_domain_from_cpu(int cpu) +{ + u32 cache_id; + struct rdt_mon_domain *mon_d; + struct mpam_resctrl_dom *dom; + struct mpam_resctrl_res *l3 = &mpam_resctrl_controls[RDT_RESOURCE_L3]; + + if (!l3->class) + return NULL; + /* TODO: how does this order with cacheinfo updates under cpuhp? */ + cache_id = get_cpu_cacheinfo_id(cpu, 3); + if (cache_id == ~0) + return NULL; + + list_for_each_entry(mon_d, &l3->resctrl_res.mon_domains, hdr.list) { + dom = container_of(mon_d, struct mpam_resctrl_dom, resctrl_mon_dom); + + if (mon_d->hdr.id == cache_id) + return dom; + } + + return NULL; +} + +/** + * mpam_resctrl_get_domain_from_cpu() - find the mpam domain structure + * @cpu: The CPU that is going online/offline. + * @res: The resctrl resource the domain should belong to. + * + * The component structures must be used to identify the CPU may be marked + * offline in the resctrl structures. However the resctrl domain list is + * used to search as this is also used to determine if resctrl thinks the + * domain is online. + * For platforms with controls, this is easy as each resource has one control + * component. + * For the monitors, we need to search the list of events... + */ +static struct mpam_resctrl_dom * +mpam_resctrl_alloc_domain_cpu(int cpu, struct mpam_resctrl_res *res) +{ + struct mpam_component *comp_iter, *ctrl_comp; + struct mpam_class *class = res->class; + int idx; + + ctrl_comp = NULL; + idx = srcu_read_lock(&mpam_srcu); + list_for_each_entry_srcu(comp_iter, &class->components, class_list, + srcu_read_lock_held(&mpam_srcu)) { + if (cpumask_test_cpu(cpu, &comp_iter->affinity)) { + ctrl_comp = comp_iter; + break; + } + } + srcu_read_unlock(&mpam_srcu, idx); + + /* cpu with unknown exported component? */ + if (WARN_ON_ONCE(!ctrl_comp)) + return ERR_PTR(-EINVAL); + + return mpam_resctrl_alloc_domain(cpumask_of(cpu), cpu_to_node(cpu), + ctrl_comp, res); +} + +static struct mpam_resctrl_dom * +mpam_resctrl_alloc_domain_nid(int nid, struct mpam_resctrl_res *res) +{ + struct mpam_component *comp_iter, *ctrl_comp; + struct mpam_class *class = res->class; + int idx; + + /* Only the memory class uses comp_id as nid */ + if (class->type != MPAM_CLASS_MEMORY) + return ERR_PTR(-EINVAL); + + ctrl_comp = NULL; + idx = srcu_read_lock(&mpam_srcu); + list_for_each_entry_srcu(comp_iter, &class->components, class_list, + srcu_read_lock_held(&mpam_srcu)) { + if (comp_iter->comp_id == nid) { + ctrl_comp = comp_iter; + break; + } + } + srcu_read_unlock(&mpam_srcu, idx); + + /* cpu with unknown exported component? */ + if (WARN_ON_ONCE(!ctrl_comp)) + return ERR_PTR(-EINVAL); + + return mpam_resctrl_alloc_domain(cpu_possible_mask, nid, ctrl_comp, res); +} + +static struct mpam_resctrl_dom * +mpam_resctrl_get_domain_from_cpu(int cpu, struct mpam_resctrl_res *res) +{ + struct mpam_resctrl_dom *dom; + struct rdt_ctrl_domain *ctrl_d; + struct rdt_resource *r = &res->resctrl_res; + + lockdep_assert_cpus_held(); + + list_for_each_entry(ctrl_d, &r->ctrl_domains, hdr.list) { + dom = container_of(ctrl_d, struct mpam_resctrl_dom, resctrl_ctrl_dom); + + if (cpumask_test_cpu(cpu, &dom->ctrl_comp->affinity)) + return dom; + } + + if (r->rid != RDT_RESOURCE_L3) + return NULL; + + /* Search the mon domain list too - needed on monitor only platforms. */ + return mpam_resctrl_get_mon_domain_from_cpu(cpu); +} + +static struct mpam_resctrl_dom * +mpam_get_domain_from_nid(int nid, struct mpam_resctrl_res *res) +{ + struct rdt_ctrl_domain *d; + struct mpam_resctrl_dom *dom; + + list_for_each_entry(d, &res->resctrl_res.ctrl_domains, hdr.list) { + dom = container_of(d, struct mpam_resctrl_dom, resctrl_ctrl_dom); + + /* Only the memory class uses comp_id as nid */ + if (dom->ctrl_comp->class->type != MPAM_CLASS_MEMORY) + continue; + + if (dom->ctrl_comp->comp_id == nid) + return dom; + } + + return NULL; +} + +int mpam_resctrl_online_cpu(unsigned int cpu) +{ + int i, err = 0; + struct mpam_resctrl_dom *dom; + struct mpam_resctrl_res *res; + + mutex_lock(&domain_list_lock); + for (i = 0; i < RDT_NUM_RESOURCES; i++) { + res = &mpam_resctrl_controls[i]; + if (!res->class) + continue; // dummy_resource; + + dom = mpam_resctrl_get_domain_from_cpu(cpu, res); + if (!dom) + dom = mpam_resctrl_alloc_domain_cpu(cpu, res); + if (IS_ERR(dom)) { + err = PTR_ERR(dom); + break; + } + + cpumask_set_cpu(cpu, &dom->resctrl_ctrl_dom.hdr.cpu_mask); + cpumask_set_cpu(cpu, &dom->resctrl_mon_dom.hdr.cpu_mask); + } + mutex_unlock(&domain_list_lock); + + if (!err) + resctrl_online_cpu(cpu); + + return err; +} + +int mpam_resctrl_offline_cpu(unsigned int cpu) +{ + int i; + struct mpam_resctrl_res *res; + struct mpam_resctrl_dom *dom; + struct rdt_mon_domain *mon_d; + struct rdt_ctrl_domain *ctrl_d; + bool ctrl_dom_empty, mon_dom_empty; + + resctrl_offline_cpu(cpu); + + mutex_lock(&domain_list_lock); + for (i = 0; i < RDT_NUM_RESOURCES; i++) { + res = &mpam_resctrl_controls[i]; + if (!res->class) + continue; // dummy resource + + dom = mpam_resctrl_get_domain_from_cpu(cpu, res); + if (WARN_ON_ONCE(!dom)) + continue; + + ctrl_dom_empty = true; + if (exposed_alloc_capable) { + mpam_reset_component_locked(dom->ctrl_comp); + + ctrl_d = &dom->resctrl_ctrl_dom; + ctrl_dom_empty = mpam_resctrl_offline_domain_hdr(cpumask_of(cpu), + &ctrl_d->hdr); + if (ctrl_dom_empty) + resctrl_offline_ctrl_domain(&res->resctrl_res, ctrl_d); + } + + mon_dom_empty = true; + if (exposed_mon_capable) { + mon_d = &dom->resctrl_mon_dom; + mon_dom_empty = mpam_resctrl_offline_domain_hdr(cpumask_of(cpu), + &mon_d->hdr); + if (mon_dom_empty) + resctrl_offline_mon_domain(&res->resctrl_res, mon_d); + } + + if (ctrl_dom_empty && mon_dom_empty) + kfree(dom); + } + mutex_unlock(&domain_list_lock); + + return 0; +} + +static int mpam_resctrl_online_node(unsigned int nid) +{ + struct mpam_resctrl_dom *dom; + struct mpam_resctrl_res *res; + + /* Domain IDs as NUMA nid is only defined for MBA */ + res = &mpam_resctrl_controls[RDT_RESOURCE_MBA]; + if (!res->class) + return 0; // dummy_resource; + + dom = mpam_get_domain_from_nid(nid, res); + if (!dom) + dom = mpam_resctrl_alloc_domain_nid(nid, res); + if (IS_ERR(dom)) + return PTR_ERR(dom); + + return 0; +} + +static int mpam_resctrl_offline_node(unsigned int nid) +{ + struct mpam_resctrl_res *res; + struct mpam_resctrl_dom *dom; + struct rdt_mon_domain *mon_d; + struct rdt_ctrl_domain *ctrl_d; + + /* Domain IDs as NUMA nid is only defined for MBA */ + res = &mpam_resctrl_controls[RDT_RESOURCE_MBA]; + if (!res->class) + return 0; // dummy_resource; + + dom = mpam_get_domain_from_nid(nid, res); + if (WARN_ON_ONCE(!dom)) + return 0; + + ctrl_d = &dom->resctrl_ctrl_dom; + resctrl_offline_ctrl_domain(&res->resctrl_res, ctrl_d); + if (!mpam_resctrl_offline_domain_hdr(cpu_possible_mask, &ctrl_d->hdr)) + return 0; + + // TODO: skip monitor domains if there are no monitors for this resource + mon_d = &dom->resctrl_mon_dom; + resctrl_offline_mon_domain(&res->resctrl_res, mon_d); + if (!mpam_resctrl_offline_domain_hdr(cpu_possible_mask, &mon_d->hdr)) + return 0; + + kfree(dom); + + return 0; +} + +static int mpam_resctrl_node_notifier(struct notifier_block *self, + unsigned long action, void *arg) +{ + struct node_notify *nn = arg; + + if (nn->nid < 0 || !mb_uses_numa_nid) + return NOTIFY_OK; + + /* + * Ignore nid that have CPUs. Resctrl needs to see the cpu offline + * call for each CPU to update the CPUs in control groups. Moving + * the overflow handler isn't an issue as only L3 can be mon_capable, + * and NUMA nid used as domain-id are only an option for MBA. + */ + if (!cpumask_empty(cpumask_of_node(nn->nid))) + return NOTIFY_OK; + + switch (action) { + case NODE_ADDED_FIRST_MEMORY: + mpam_resctrl_online_node(nn->nid); + break; + case NODE_REMOVED_LAST_MEMORY: + mpam_resctrl_offline_node(nn->nid); + break; + default: + /* don't care */ + } + + return NOTIFY_OK; +} + +int mpam_resctrl_setup(void) +{ + int err = 0; + enum resctrl_event_id j; + enum resctrl_res_level i; + struct mpam_resctrl_res *res; + struct mpam_resctrl_mon *mon; + + wait_event(wait_cacheinfo_ready, cacheinfo_ready); + + cpus_read_lock(); + for (i = 0; i < RDT_NUM_RESOURCES; i++) { + res = &mpam_resctrl_controls[i]; + INIT_LIST_HEAD(&res->resctrl_res.ctrl_domains); + INIT_LIST_HEAD(&res->resctrl_res.mon_domains); + res->resctrl_res.rid = i; + } + + /* Find some classes to use for controls */ + mpam_resctrl_pick_caches(); + mpam_resctrl_pick_mba(); + + /* Initialise the resctrl structures from the classes */ + for (i = 0; i < RDT_NUM_RESOURCES; i++) { + res = &mpam_resctrl_controls[i]; + if (!res->class) + continue; // dummy resource + + err = mpam_resctrl_control_init(res, i); + if (err) { + pr_debug("Failed to initialise rid %u\n", i); + break; + } + } + + /* Find some classes to use for monitors */ + mpam_resctrl_pick_counters(); + + for (j = 0; j < QOS_NUM_EVENTS; j++) { + mon = &mpam_resctrl_counters[j]; + if (!mon->class) + continue; // dummy resource + + mpam_resctrl_monitor_init(mon, j); + } + + if (mb_numa_nid_possible) { + hotplug_node_notifier(mpam_resctrl_node_notifier, + RESCTRL_CALLBACK_PRI); + } + + cpus_read_unlock(); + + if (err || (!exposed_alloc_capable && !exposed_mon_capable)) { + if (err) + pr_debug("Internal error %d - resctrl not supported\n", err); + else + pr_debug("No alloc(%u) or monitor(%u) found - resctrl not supported\n", + exposed_alloc_capable, exposed_mon_capable); + err = -EOPNOTSUPP; + } + + if (!err) { + if (!is_power_of_2(mpam_pmg_max + 1)) { + /* + * If not all the partid*pmg values are valid indexes, + * resctrl may allocate pmg that don't exist. This + * should cause an error interrupt. + */ + pr_warn("Number of PMG is not a power of 2! resctrl may misbehave"); + } + + err = resctrl_init(); + if (!err) + WRITE_ONCE(resctrl_enabled, true); + } + + return err; +} + +void mpam_resctrl_exit(void) +{ + if (!READ_ONCE(resctrl_enabled)) + return; + + WRITE_ONCE(resctrl_enabled, false); + resctrl_exit(); +} + +static void mpam_resctrl_teardown_mon(struct mpam_resctrl_mon *mon, struct mpam_class *class) +{ + u32 num_mbwu_mon = l3_num_allocated_mbwu; + + if (!mon->mbwu_idx_to_mon) + return; + + if (mon->assigned_counters) { + __free_mbwu_mon(class, mon->assigned_counters, num_mbwu_mon); + mon->assigned_counters = NULL; + kfree(mon->mbwu_idx_to_mon); + } else { + __free_mbwu_mon(class, mon->mbwu_idx_to_mon, num_mbwu_mon); + } + mon->mbwu_idx_to_mon = NULL; +} + +/* + * The driver is detaching an MSC from this class, if resctrl was using it, + * pull on resctrl_exit(). + */ +void mpam_resctrl_teardown_class(struct mpam_class *class) +{ + int i; + struct mpam_resctrl_res *res; + struct mpam_resctrl_mon *mon; + + might_sleep(); + + for (i = 0; i < RDT_NUM_RESOURCES; i++) { + res = &mpam_resctrl_controls[i]; + if (res->class == class) { + mpam_resctrl_exit(); + res->class = NULL; + break; + } + } + for (i = 0; i < QOS_NUM_EVENTS; i++) { + mon = &mpam_resctrl_counters[i]; + if (mon->class == class) { + mpam_resctrl_exit(); + mon->class = NULL; + + mpam_resctrl_teardown_mon(mon, class); + + break; + } + } +} + +static int __init __cacheinfo_ready(void) +{ + cacheinfo_ready = true; + wake_up(&wait_cacheinfo_ready); + + return 0; +} +device_initcall_sync(__cacheinfo_ready); + +#ifdef CONFIG_MPAM_KUNIT_TEST +#include "test_mpam_resctrl.c" +#endif diff --git a/drivers/resctrl/test_mpam_devices.c b/drivers/resctrl/test_mpam_devices.c new file mode 100644 index 0000000000000..dea3a6fdfd195 --- /dev/null +++ b/drivers/resctrl/test_mpam_devices.c @@ -0,0 +1,457 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2025 Arm Ltd. +/* This file is intended to be included into mpam_devices.c */ + +#include + +/* + * This test catches fields that aren't being sanitised - but can't tell you + * which one... + */ +static void test__props_mismatch(struct kunit *test) +{ + struct mpam_props parent = { 0 }; + struct mpam_props child; + size_t props_bytes = offsetof(struct mpam_props, num_mbwu_mon) + + sizeof(parent.num_mbwu_mon); + + memset(&child, 0xff, sizeof(child)); + __props_mismatch(&parent, &child, false); + + memset(&child, 0, sizeof(child)); + KUNIT_EXPECT_EQ(test, memcmp(&parent, &child, props_bytes), 0); + + memset(&child, 0xff, sizeof(child)); + __props_mismatch(&parent, &child, true); + + KUNIT_EXPECT_EQ(test, memcmp(&parent, &child, props_bytes), 0); +} + +static struct list_head fake_classes_list; +static struct mpam_class fake_class = { 0 }; +static struct mpam_component fake_comp1 = { 0 }; +static struct mpam_component fake_comp2 = { 0 }; +static struct mpam_vmsc fake_vmsc1 = { 0 }; +static struct mpam_vmsc fake_vmsc2 = { 0 }; +static struct mpam_msc fake_msc1 = { 0 }; +static struct mpam_msc fake_msc2 = { 0 }; +static struct mpam_msc_ris fake_ris1 = { 0 }; +static struct mpam_msc_ris fake_ris2 = { 0 }; +static struct platform_device fake_pdev = { 0 }; + +static inline void reset_fake_hierarchy(void) +{ + INIT_LIST_HEAD(&fake_classes_list); + + memset(&fake_class, 0, sizeof(fake_class)); + fake_class.level = 3; + fake_class.type = MPAM_CLASS_CACHE; + INIT_LIST_HEAD_RCU(&fake_class.components); + INIT_LIST_HEAD(&fake_class.classes_list); + + memset(&fake_comp1, 0, sizeof(fake_comp1)); + memset(&fake_comp2, 0, sizeof(fake_comp2)); + fake_comp1.comp_id = 1; + fake_comp2.comp_id = 2; + INIT_LIST_HEAD(&fake_comp1.vmsc); + INIT_LIST_HEAD(&fake_comp1.class_list); + INIT_LIST_HEAD(&fake_comp2.vmsc); + INIT_LIST_HEAD(&fake_comp2.class_list); + + memset(&fake_vmsc1, 0, sizeof(fake_vmsc1)); + memset(&fake_vmsc2, 0, sizeof(fake_vmsc2)); + INIT_LIST_HEAD(&fake_vmsc1.ris); + INIT_LIST_HEAD(&fake_vmsc1.comp_list); + fake_vmsc1.msc = &fake_msc1; + INIT_LIST_HEAD(&fake_vmsc2.ris); + INIT_LIST_HEAD(&fake_vmsc2.comp_list); + fake_vmsc2.msc = &fake_msc2; + + memset(&fake_ris1, 0, sizeof(fake_ris1)); + memset(&fake_ris2, 0, sizeof(fake_ris2)); + fake_ris1.ris_idx = 1; + INIT_LIST_HEAD(&fake_ris1.msc_list); + fake_ris2.ris_idx = 2; + INIT_LIST_HEAD(&fake_ris2.msc_list); + + fake_msc1.pdev = &fake_pdev; + fake_msc2.pdev = &fake_pdev; + + list_add(&fake_class.classes_list, &fake_classes_list); +} + +static void test_mpam_enable_merge_features(struct kunit *test) +{ + reset_fake_hierarchy(); + + mutex_lock(&mpam_list_lock); + + /* One Class+Comp, two RIS in one vMSC with common features */ + fake_comp1.class = &fake_class; + list_add(&fake_comp1.class_list, &fake_class.components); + fake_comp2.class = NULL; + fake_vmsc1.comp = &fake_comp1; + list_add(&fake_vmsc1.comp_list, &fake_comp1.vmsc); + fake_vmsc2.comp = NULL; + fake_ris1.vmsc = &fake_vmsc1; + list_add(&fake_ris1.vmsc_list, &fake_vmsc1.ris); + fake_ris2.vmsc = &fake_vmsc1; + list_add(&fake_ris2.vmsc_list, &fake_vmsc1.ris); + + mpam_set_feature(mpam_feat_cpor_part, &fake_ris1.props); + mpam_set_feature(mpam_feat_cpor_part, &fake_ris2.props); + fake_ris1.props.cpbm_wd = 4; + fake_ris2.props.cpbm_wd = 4; + + mpam_enable_merge_features(&fake_classes_list); + + KUNIT_EXPECT_TRUE(test, mpam_has_feature(mpam_feat_cpor_part, &fake_class.props)); + KUNIT_EXPECT_EQ(test, fake_class.props.cpbm_wd, 4); + + reset_fake_hierarchy(); + + /* One Class+Comp, two RIS in one vMSC with non-overlapping features */ + fake_comp1.class = &fake_class; + list_add(&fake_comp1.class_list, &fake_class.components); + fake_comp2.class = NULL; + fake_vmsc1.comp = &fake_comp1; + list_add(&fake_vmsc1.comp_list, &fake_comp1.vmsc); + fake_vmsc2.comp = NULL; + fake_ris1.vmsc = &fake_vmsc1; + list_add(&fake_ris1.vmsc_list, &fake_vmsc1.ris); + fake_ris2.vmsc = &fake_vmsc1; + list_add(&fake_ris2.vmsc_list, &fake_vmsc1.ris); + + mpam_set_feature(mpam_feat_cpor_part, &fake_ris1.props); + mpam_set_feature(mpam_feat_cmax_cmin, &fake_ris2.props); + fake_ris1.props.cpbm_wd = 4; + fake_ris2.props.cmax_wd = 4; + + mpam_enable_merge_features(&fake_classes_list); + + /* Multiple RIS within one MSC controlling the same resource can be mismatched */ + KUNIT_EXPECT_TRUE(test, mpam_has_feature(mpam_feat_cpor_part, &fake_class.props)); + KUNIT_EXPECT_TRUE(test, mpam_has_feature(mpam_feat_cmax_cmin, &fake_class.props)); + KUNIT_EXPECT_TRUE(test, mpam_has_feature(mpam_feat_cmax_cmin, &fake_vmsc1.props)); + KUNIT_EXPECT_EQ(test, fake_class.props.cpbm_wd, 4); + KUNIT_EXPECT_EQ(test, fake_vmsc1.props.cmax_wd, 4); + KUNIT_EXPECT_EQ(test, fake_class.props.cmax_wd, 4); + + reset_fake_hierarchy(); + + /* One Class+Comp, two MSC with overlapping features */ + fake_comp1.class = &fake_class; + list_add(&fake_comp1.class_list, &fake_class.components); + fake_comp2.class = NULL; + fake_vmsc1.comp = &fake_comp1; + list_add(&fake_vmsc1.comp_list, &fake_comp1.vmsc); + fake_vmsc2.comp = &fake_comp1; + list_add(&fake_vmsc2.comp_list, &fake_comp1.vmsc); + fake_ris1.vmsc = &fake_vmsc1; + list_add(&fake_ris1.vmsc_list, &fake_vmsc1.ris); + fake_ris2.vmsc = &fake_vmsc2; + list_add(&fake_ris2.vmsc_list, &fake_vmsc2.ris); + + mpam_set_feature(mpam_feat_cpor_part, &fake_ris1.props); + mpam_set_feature(mpam_feat_cpor_part, &fake_ris2.props); + fake_ris1.props.cpbm_wd = 4; + fake_ris2.props.cpbm_wd = 4; + + mpam_enable_merge_features(&fake_classes_list); + + KUNIT_EXPECT_TRUE(test, mpam_has_feature(mpam_feat_cpor_part, &fake_class.props)); + KUNIT_EXPECT_EQ(test, fake_class.props.cpbm_wd, 4); + + reset_fake_hierarchy(); + + /* One Class+Comp, two MSC with non-overlapping features */ + fake_comp1.class = &fake_class; + list_add(&fake_comp1.class_list, &fake_class.components); + fake_comp2.class = NULL; + fake_vmsc1.comp = &fake_comp1; + list_add(&fake_vmsc1.comp_list, &fake_comp1.vmsc); + fake_vmsc2.comp = &fake_comp1; + list_add(&fake_vmsc2.comp_list, &fake_comp1.vmsc); + fake_ris1.vmsc = &fake_vmsc1; + list_add(&fake_ris1.vmsc_list, &fake_vmsc1.ris); + fake_ris2.vmsc = &fake_vmsc2; + list_add(&fake_ris2.vmsc_list, &fake_vmsc2.ris); + + mpam_set_feature(mpam_feat_cpor_part, &fake_ris1.props); + mpam_set_feature(mpam_feat_cmax_cmin, &fake_ris2.props); + fake_ris1.props.cpbm_wd = 4; + fake_ris2.props.cmax_wd = 4; + + mpam_enable_merge_features(&fake_classes_list); + + /* + * Multiple RIS in different MSC can't control the same resource, + * mismatched features can not be supported. + */ + KUNIT_EXPECT_FALSE(test, mpam_has_feature(mpam_feat_cpor_part, &fake_class.props)); + KUNIT_EXPECT_FALSE(test, mpam_has_feature(mpam_feat_cmax_cmin, &fake_class.props)); + KUNIT_EXPECT_EQ(test, fake_class.props.cpbm_wd, 0); + KUNIT_EXPECT_EQ(test, fake_class.props.cmax_wd, 0); + + reset_fake_hierarchy(); + + /* One Class+Comp, two MSC with incompatible overlapping features */ + fake_comp1.class = &fake_class; + list_add(&fake_comp1.class_list, &fake_class.components); + fake_comp2.class = NULL; + fake_vmsc1.comp = &fake_comp1; + list_add(&fake_vmsc1.comp_list, &fake_comp1.vmsc); + fake_vmsc2.comp = &fake_comp1; + list_add(&fake_vmsc2.comp_list, &fake_comp1.vmsc); + fake_ris1.vmsc = &fake_vmsc1; + list_add(&fake_ris1.vmsc_list, &fake_vmsc1.ris); + fake_ris2.vmsc = &fake_vmsc2; + list_add(&fake_ris2.vmsc_list, &fake_vmsc2.ris); + + mpam_set_feature(mpam_feat_cpor_part, &fake_ris1.props); + mpam_set_feature(mpam_feat_cpor_part, &fake_ris2.props); + mpam_set_feature(mpam_feat_mbw_part, &fake_ris1.props); + mpam_set_feature(mpam_feat_mbw_part, &fake_ris2.props); + fake_ris1.props.cpbm_wd = 5; + fake_ris2.props.cpbm_wd = 3; + fake_ris1.props.mbw_pbm_bits = 5; + fake_ris2.props.mbw_pbm_bits = 3; + + mpam_enable_merge_features(&fake_classes_list); + + /* + * Multiple RIS in different MSC can't control the same resource, + * mismatched features can not be supported. + */ + KUNIT_EXPECT_FALSE(test, mpam_has_feature(mpam_feat_cpor_part, &fake_class.props)); + KUNIT_EXPECT_FALSE(test, mpam_has_feature(mpam_feat_mbw_part, &fake_class.props)); + KUNIT_EXPECT_EQ(test, fake_class.props.cpbm_wd, 0); + KUNIT_EXPECT_EQ(test, fake_class.props.mbw_pbm_bits, 0); + + reset_fake_hierarchy(); + + /* One Class+Comp, two MSC with overlapping features that need tweaking */ + fake_comp1.class = &fake_class; + list_add(&fake_comp1.class_list, &fake_class.components); + fake_comp2.class = NULL; + fake_vmsc1.comp = &fake_comp1; + list_add(&fake_vmsc1.comp_list, &fake_comp1.vmsc); + fake_vmsc2.comp = &fake_comp1; + list_add(&fake_vmsc2.comp_list, &fake_comp1.vmsc); + fake_ris1.vmsc = &fake_vmsc1; + list_add(&fake_ris1.vmsc_list, &fake_vmsc1.ris); + fake_ris2.vmsc = &fake_vmsc2; + list_add(&fake_ris2.vmsc_list, &fake_vmsc2.ris); + + mpam_set_feature(mpam_feat_mbw_min, &fake_ris1.props); + mpam_set_feature(mpam_feat_mbw_min, &fake_ris2.props); + mpam_set_feature(mpam_feat_cmax_cmax, &fake_ris1.props); + mpam_set_feature(mpam_feat_cmax_cmax, &fake_ris2.props); + fake_ris1.props.bwa_wd = 5; + fake_ris2.props.bwa_wd = 3; + fake_ris1.props.cmax_wd = 5; + fake_ris2.props.cmax_wd = 3; + + mpam_enable_merge_features(&fake_classes_list); + + /* + * RIS with different control properties need to be sanitised so the + * class has the common set of properties. + */ + KUNIT_EXPECT_TRUE(test, mpam_has_feature(mpam_feat_mbw_min, &fake_class.props)); + KUNIT_EXPECT_TRUE(test, mpam_has_feature(mpam_feat_cmax_cmax, &fake_class.props)); + KUNIT_EXPECT_EQ(test, fake_class.props.bwa_wd, 3); + KUNIT_EXPECT_EQ(test, fake_class.props.cmax_wd, 3); + + reset_fake_hierarchy(); + + /* One Class Two Comp with overlapping features */ + fake_comp1.class = &fake_class; + list_add(&fake_comp1.class_list, &fake_class.components); + fake_comp2.class = &fake_class; + list_add(&fake_comp2.class_list, &fake_class.components); + fake_vmsc1.comp = &fake_comp1; + list_add(&fake_vmsc1.comp_list, &fake_comp1.vmsc); + fake_vmsc2.comp = &fake_comp2; + list_add(&fake_vmsc2.comp_list, &fake_comp2.vmsc); + fake_ris1.vmsc = &fake_vmsc1; + list_add(&fake_ris1.vmsc_list, &fake_vmsc1.ris); + fake_ris2.vmsc = &fake_vmsc2; + list_add(&fake_ris2.vmsc_list, &fake_vmsc2.ris); + + mpam_set_feature(mpam_feat_cpor_part, &fake_ris1.props); + mpam_set_feature(mpam_feat_cpor_part, &fake_ris2.props); + fake_ris1.props.cpbm_wd = 4; + fake_ris2.props.cpbm_wd = 4; + + mpam_enable_merge_features(&fake_classes_list); + + KUNIT_EXPECT_TRUE(test, mpam_has_feature(mpam_feat_cpor_part, &fake_class.props)); + KUNIT_EXPECT_EQ(test, fake_class.props.cpbm_wd, 4); + + reset_fake_hierarchy(); + + /* One Class Two Comp with non-overlapping features */ + fake_comp1.class = &fake_class; + list_add(&fake_comp1.class_list, &fake_class.components); + fake_comp2.class = &fake_class; + list_add(&fake_comp2.class_list, &fake_class.components); + fake_vmsc1.comp = &fake_comp1; + list_add(&fake_vmsc1.comp_list, &fake_comp1.vmsc); + fake_vmsc2.comp = &fake_comp2; + list_add(&fake_vmsc2.comp_list, &fake_comp2.vmsc); + fake_ris1.vmsc = &fake_vmsc1; + list_add(&fake_ris1.vmsc_list, &fake_vmsc1.ris); + fake_ris2.vmsc = &fake_vmsc2; + list_add(&fake_ris2.vmsc_list, &fake_vmsc2.ris); + + mpam_set_feature(mpam_feat_cpor_part, &fake_ris1.props); + mpam_set_feature(mpam_feat_cmax_cmin, &fake_ris2.props); + fake_ris1.props.cpbm_wd = 4; + fake_ris2.props.cmax_wd = 4; + + mpam_enable_merge_features(&fake_classes_list); + + /* + * Multiple components can't control the same resource, mismatched features can + * not be supported. + */ + KUNIT_EXPECT_FALSE(test, mpam_has_feature(mpam_feat_cpor_part, &fake_class.props)); + KUNIT_EXPECT_FALSE(test, mpam_has_feature(mpam_feat_cmax_cmin, &fake_class.props)); + KUNIT_EXPECT_EQ(test, fake_class.props.cpbm_wd, 0); + KUNIT_EXPECT_EQ(test, fake_class.props.cmax_wd, 0); + + mutex_unlock(&mpam_list_lock); +} + +static void test_mpam_extend_config(struct kunit *test) +{ + struct mpam_config fake_cfg = { 0 }; + struct mpam_class fake_class = { 0 }; + + /* Configurations with both are not modified */ + fake_class.props.bwa_wd = 16; + fake_cfg.mbw_max = 0xfeef; + fake_cfg.mbw_min = 0xfeef; + bitmap_zero(fake_cfg.features, MPAM_FEATURE_LAST); + mpam_set_feature(mpam_feat_mbw_max, &fake_cfg); + mpam_set_feature(mpam_feat_mbw_min, &fake_cfg); + mpam_extend_config(&fake_class, &fake_cfg); + KUNIT_EXPECT_TRUE(test, mpam_has_feature(mpam_feat_mbw_max, &fake_cfg)); + KUNIT_EXPECT_TRUE(test, mpam_has_feature(mpam_feat_mbw_min, &fake_cfg)); + KUNIT_EXPECT_EQ(test, fake_cfg.mbw_max, 0xfeef); + KUNIT_EXPECT_EQ(test, fake_cfg.mbw_min, 0xfeef); + + /* When a min is missing, it is generated */ + fake_class.props.bwa_wd = 16; + fake_cfg.mbw_max = 0xfeef; + fake_cfg.mbw_min = 0; + bitmap_zero(fake_cfg.features, MPAM_FEATURE_LAST); + mpam_set_feature(mpam_feat_mbw_max, &fake_cfg); + mpam_extend_config(&fake_class, &fake_cfg); + KUNIT_EXPECT_TRUE(test, mpam_has_feature(mpam_feat_mbw_max, &fake_cfg)); + KUNIT_EXPECT_TRUE(test, mpam_has_feature(mpam_feat_mbw_min, &fake_cfg)); + KUNIT_EXPECT_EQ(test, fake_cfg.mbw_max, 0xfeef); + KUNIT_EXPECT_EQ(test, fake_cfg.mbw_min, 0xf224); + + fake_class.props.bwa_wd = 8; + fake_cfg.mbw_max = 0xfeef; + fake_cfg.mbw_min = 0; + bitmap_zero(fake_cfg.features, MPAM_FEATURE_LAST); + mpam_set_feature(mpam_feat_mbw_max, &fake_cfg); + mpam_extend_config(&fake_class, &fake_cfg); + KUNIT_EXPECT_TRUE(test, mpam_has_feature(mpam_feat_mbw_max, &fake_cfg)); + KUNIT_EXPECT_TRUE(test, mpam_has_feature(mpam_feat_mbw_min, &fake_cfg)); + KUNIT_EXPECT_EQ(test, fake_cfg.mbw_max, 0xfeef); + KUNIT_EXPECT_EQ(test, fake_cfg.mbw_min, 0xf224); + + /* 5% below the minimum granule, is still the minimum granule */ + fake_class.props.bwa_wd = 12; + fake_cfg.mbw_max = 0xf; + fake_cfg.mbw_min = 0; + bitmap_zero(fake_cfg.features, MPAM_FEATURE_LAST); + mpam_set_feature(mpam_feat_mbw_max, &fake_cfg); + mpam_extend_config(&fake_class, &fake_cfg); + KUNIT_EXPECT_TRUE(test, mpam_has_feature(mpam_feat_mbw_max, &fake_cfg)); + KUNIT_EXPECT_TRUE(test, mpam_has_feature(mpam_feat_mbw_min, &fake_cfg)); + KUNIT_EXPECT_EQ(test, fake_cfg.mbw_max, 0xf); + KUNIT_EXPECT_EQ(test, fake_cfg.mbw_min, 0xf); + + fake_class.props.bwa_wd = 16; + fake_cfg.mbw_max = 0x4; + fake_cfg.mbw_min = 0; + bitmap_zero(fake_cfg.features, MPAM_FEATURE_LAST); + mpam_set_feature(mpam_feat_mbw_max, &fake_cfg); + mpam_extend_config(&fake_class, &fake_cfg); + KUNIT_EXPECT_TRUE(test, mpam_has_feature(mpam_feat_mbw_max, &fake_cfg)); + KUNIT_EXPECT_TRUE(test, mpam_has_feature(mpam_feat_mbw_min, &fake_cfg)); + KUNIT_EXPECT_EQ(test, fake_cfg.mbw_max, 0x4); + KUNIT_EXPECT_EQ(test, fake_cfg.mbw_min, 0x0); +} + +static void test_mpam_reset_msc_bitmap(struct kunit *test) +{ + char __iomem *buf = kunit_kzalloc(test, SZ_16K, GFP_KERNEL); + struct mpam_msc fake_msc = {}; + u32 *test_result; + + if (!buf) + return; + + fake_msc.mapped_hwpage = buf; + fake_msc.mapped_hwpage_sz = SZ_16K; + cpumask_copy(&fake_msc.accessibility, cpu_possible_mask); + + /* Satisfy lockdep checks */ + mutex_init(&fake_msc.part_sel_lock); + mutex_lock(&fake_msc.part_sel_lock); + + test_result = (u32 *)(buf + MPAMCFG_CPBM); + + mpam_reset_msc_bitmap(&fake_msc, MPAMCFG_CPBM, 0); + KUNIT_EXPECT_EQ(test, test_result[0], 0); + KUNIT_EXPECT_EQ(test, test_result[1], 0); + test_result[0] = 0; + test_result[1] = 0; + + mpam_reset_msc_bitmap(&fake_msc, MPAMCFG_CPBM, 1); + KUNIT_EXPECT_EQ(test, test_result[0], 1); + KUNIT_EXPECT_EQ(test, test_result[1], 0); + test_result[0] = 0; + test_result[1] = 0; + + mpam_reset_msc_bitmap(&fake_msc, MPAMCFG_CPBM, 16); + KUNIT_EXPECT_EQ(test, test_result[0], 0xffff); + KUNIT_EXPECT_EQ(test, test_result[1], 0); + test_result[0] = 0; + test_result[1] = 0; + + mpam_reset_msc_bitmap(&fake_msc, MPAMCFG_CPBM, 32); + KUNIT_EXPECT_EQ(test, test_result[0], 0xffffffff); + KUNIT_EXPECT_EQ(test, test_result[1], 0); + test_result[0] = 0; + test_result[1] = 0; + + mpam_reset_msc_bitmap(&fake_msc, MPAMCFG_CPBM, 33); + KUNIT_EXPECT_EQ(test, test_result[0], 0xffffffff); + KUNIT_EXPECT_EQ(test, test_result[1], 1); + test_result[0] = 0; + test_result[1] = 0; + + mutex_unlock(&fake_msc.part_sel_lock); +} + +static struct kunit_case mpam_devices_test_cases[] = { + KUNIT_CASE(test_mpam_reset_msc_bitmap), + KUNIT_CASE(test_mpam_enable_merge_features), + KUNIT_CASE(test__props_mismatch), + KUNIT_CASE(test_mpam_extend_config), + {} +}; + +static struct kunit_suite mpam_devices_test_suite = { + .name = "mpam_devices_test_suite", + .test_cases = mpam_devices_test_cases, +}; + +kunit_test_suites(&mpam_devices_test_suite); diff --git a/drivers/resctrl/test_mpam_resctrl.c b/drivers/resctrl/test_mpam_resctrl.c new file mode 100644 index 0000000000000..53289255fc537 --- /dev/null +++ b/drivers/resctrl/test_mpam_resctrl.c @@ -0,0 +1,457 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2025 Arm Ltd. +/* This file is intended to be included into mpam_resctrl.c */ + +#include +#include +#include +#include +#include + +struct percent_value_case { + u8 pc; + u8 width; + u16 value; +}; + +/* + * Mysterious inscriptions taken from ARM DDI 0598D.b, + * "Arm Architecture Reference Manual Supplement - Memory System + * Resource Partitioning and Monitoring (MPAM), for A-profile + * architecture", Section 9.8, "About the fixed-point fractional + * format" (exact percentage entries only): + */ +static const struct percent_value_case percent_value_cases[] = { + /* Architectural cases: */ + { 1, 8, 1 }, { 1, 12, 0x27 }, { 1, 16, 0x28e }, + { 25, 8, 0x3f }, { 25, 12, 0x3ff }, { 25, 16, 0x3fff }, + { 35, 8, 0x58 }, { 35, 12, 0x598 }, { 35, 16, 0x5998 }, + { 45, 8, 0x72 }, { 45, 12, 0x732 }, { 45, 16, 0x7332 }, + { 50, 8, 0x7f }, { 50, 12, 0x7ff }, { 50, 16, 0x7fff }, + { 52, 8, 0x84 }, { 52, 12, 0x850 }, { 52, 16, 0x851d }, + { 55, 8, 0x8b }, { 55, 12, 0x8cb }, { 55, 16, 0x8ccb }, + { 58, 8, 0x93 }, { 58, 12, 0x946 }, { 58, 16, 0x9479 }, + { 75, 8, 0xbf }, { 75, 12, 0xbff }, { 75, 16, 0xbfff }, + { 88, 8, 0xe0 }, { 88, 12, 0xe13 }, { 88, 16, 0xe146 }, + { 95, 8, 0xf2 }, { 95, 12, 0xf32 }, { 95, 16, 0xf332 }, + { 100, 8, 0xff }, { 100, 12, 0xfff }, { 100, 16, 0xffff }, + +}; + +static void test_percent_value_desc(const struct percent_value_case *param, + char *desc) +{ + snprintf(desc, KUNIT_PARAM_DESC_SIZE, + "pc=%d, width=%d, value=0x%.*x\n", + param->pc, param->width, + DIV_ROUND_UP(param->width, 4), param->value); +} + +KUNIT_ARRAY_PARAM(test_percent_value, percent_value_cases, + test_percent_value_desc); + +struct percent_value_test_info { + u32 pc; /* result of value-to-percent conversion */ + u32 value; /* result of percent-to-value conversion */ + u32 max_value; /* maximum raw value allowed by test params */ + unsigned int shift; /* promotes raw testcase value to 16 bits */ +}; + +/* + * Convert a reference percentage to a fixed-point MAX value and + * vice-versa, based on param (not test->param_value!) + */ +static void __prepare_percent_value_test(struct kunit *test, + struct percent_value_test_info *res, + const struct percent_value_case *param) +{ + struct mpam_props fake_props = { }; + + /* Reject bogus test parameters that would break the tests: */ + KUNIT_ASSERT_GE(test, param->width, 1); + KUNIT_ASSERT_LE(test, param->width, 16); + KUNIT_ASSERT_LT(test, param->value, 1 << param->width); + + mpam_set_feature(mpam_feat_mbw_max, &fake_props); + fake_props.bwa_wd = param->width; + + res->shift = 16 - param->width; + res->max_value = GENMASK_U32(param->width - 1, 0); + res->value = percent_to_mbw_max(param->pc, &fake_props); + res->pc = mbw_max_to_percent(param->value << res->shift, &fake_props); +} + +static void test_get_mba_granularity(struct kunit *test) +{ + int ret; + struct mpam_props fake_props = { }; + + /* Use MBW_PBM */ + mpam_set_feature(mpam_feat_mbw_part, &fake_props); + + /* 0 bits means the control is unconfigurable */ + fake_props.mbw_pbm_bits = 0; + KUNIT_EXPECT_FALSE(test, mba_class_use_mbw_part(&fake_props)); + + /* Otherwise, bitmaps that fit in a u32 are supported: */ + fake_props.mbw_pbm_bits = 1; + KUNIT_EXPECT_TRUE(test, mba_class_use_mbw_part(&fake_props)); + + fake_props.mbw_pbm_bits = 32; + KUNIT_EXPECT_TRUE(test, mba_class_use_mbw_part(&fake_props)); + + /* But bigger bitmaps aren't: */ + fake_props.mbw_pbm_bits = 33; + KUNIT_EXPECT_FALSE(test, mba_class_use_mbw_part(&fake_props)); + + fake_props.mbw_pbm_bits = 4; + ret = get_mba_granularity(&fake_props); + KUNIT_EXPECT_EQ(test, ret, 25); /* DIV_ROUND_UP(100, 4)% = 25% */ + + fake_props.mbw_pbm_bits = 6; + ret = get_mba_granularity(&fake_props); + KUNIT_EXPECT_EQ(test, ret, 17); /* DIV_ROUND_UP(100, 6)% = 7% */ + + /* Largest bitmap size that the drivers supports, for now: */ + fake_props.mbw_pbm_bits = 32; + ret = get_mba_granularity(&fake_props); + KUNIT_EXPECT_EQ(test, ret, 4); /* DIV_ROUND_UP(100, 32)% = 4% */ + + /* Use MBW_MAX */ + bitmap_zero(fake_props.features, MPAM_FEATURE_LAST); + fake_props.mbw_pbm_bits = 0; + mpam_set_feature(mpam_feat_mbw_max, &fake_props); + + fake_props.bwa_wd = 0; + KUNIT_EXPECT_FALSE(test, mba_class_use_mbw_max(&fake_props)); + + fake_props.bwa_wd = 1; + KUNIT_EXPECT_TRUE(test, mba_class_use_mbw_max(&fake_props)); + + /* Architectural maximum: */ + fake_props.bwa_wd = 16; + KUNIT_EXPECT_TRUE(test, mba_class_use_mbw_max(&fake_props)); + + /* No usable control... */ + fake_props.bwa_wd = 0; + ret = get_mba_granularity(&fake_props); + KUNIT_EXPECT_EQ(test, ret, 0); + + fake_props.bwa_wd = 1; + ret = get_mba_granularity(&fake_props); + KUNIT_EXPECT_EQ(test, ret, 50); /* DIV_ROUND_UP(100, 1 << 1)% = 50% */ + + fake_props.bwa_wd = 2; + ret = get_mba_granularity(&fake_props); + KUNIT_EXPECT_EQ(test, ret, 25); /* DIV_ROUND_UP(100, 1 << 2)% = 25% */ + + fake_props.bwa_wd = 3; + ret = get_mba_granularity(&fake_props); + KUNIT_EXPECT_EQ(test, ret, 13); /* DIV_ROUND_UP(100, 1 << 3)% = 13% */ + + fake_props.bwa_wd = 6; + ret = get_mba_granularity(&fake_props); + KUNIT_EXPECT_EQ(test, ret, 2); /* DIV_ROUND_UP(100, 1 << 6)% = 2% */ + + fake_props.bwa_wd = 7; + ret = get_mba_granularity(&fake_props); + KUNIT_EXPECT_EQ(test, ret, 1); /* DIV_ROUND_UP(100, 1 << 7)% = 1% */ + + /* Granularity saturates at 1% */ + fake_props.bwa_wd = 16; /* architectural maximum */ + ret = get_mba_granularity(&fake_props); + KUNIT_EXPECT_EQ(test, ret, 1); /* DIV_ROUND_UP(100, 1 << 16)% = 1% */ +} + +static void test_mbw_pbm_to_percent(struct kunit *test) +{ + int ret; + struct mpam_props fake_props = {0}; + + mpam_set_feature(mpam_feat_mbw_part, &fake_props); + fake_props.mbw_pbm_bits = 4; + + ret = mbw_pbm_to_percent(0x0, &fake_props); + KUNIT_EXPECT_EQ(test, ret, 0); + + ret = mbw_pbm_to_percent(0x3, &fake_props); + KUNIT_EXPECT_EQ(test, ret, 50); + + ret = mbw_pbm_to_percent(0x7, &fake_props); + KUNIT_EXPECT_EQ(test, ret, 75); + + fake_props.mbw_pbm_bits = 16; + ret = mbw_pbm_to_percent(0xffff, &fake_props); + KUNIT_EXPECT_EQ(test, ret, 100); + + fake_props.mbw_pbm_bits = 0; + ret = mbw_pbm_to_percent(0xff, &fake_props); + KUNIT_EXPECT_EQ(test, ret, 0); +} + +static void test_fract16_to_percent(struct kunit *test) +{ + const struct percent_value_case *param = test->param_value; + struct percent_value_test_info res; + + /* + * Since the reference values in percent_value_cases[] all + * correspond to exact percentages, round-to-nearest will + * always give the exact percentage back when the MPAM max + * value has precision of 0.5% or finer. (Always true for the + * reference data, since they all specify 8 bits or more of + * precision. + * + * So, keep it simple and demand an exact match: + */ + __prepare_percent_value_test(test, &res, param); + KUNIT_EXPECT_EQ(test, res.pc, param->pc); +} + +static void test_percent_to_mbw_pbm(struct kunit *test) +{ + unsigned long ret; + struct mpam_props fake_props = {0}; + + mpam_set_feature(mpam_feat_mbw_part, &fake_props); + fake_props.mbw_pbm_bits = 4; + + ret = percent_to_mbw_pbm(100, &fake_props); + KUNIT_EXPECT_EQ(test, bitmap_weight(&ret, fake_props.mbw_pbm_bits), 4); + + ret = percent_to_mbw_pbm(50, &fake_props); + KUNIT_EXPECT_EQ(test, bitmap_weight(&ret, fake_props.mbw_pbm_bits), 2); + + ret = percent_to_mbw_pbm(0, &fake_props); + KUNIT_EXPECT_EQ(test, bitmap_weight(&ret, fake_props.mbw_pbm_bits), 0); + + fake_props.mbw_pbm_bits = 16; + ret = percent_to_mbw_pbm(100, &fake_props); + KUNIT_EXPECT_EQ(test, bitmap_weight(&ret, fake_props.mbw_pbm_bits), 16); +} + +static void test_percent_to_mbw_max(struct kunit *test) +{ + const struct percent_value_case *param = test->param_value; + struct percent_value_test_info res; + + __prepare_percent_value_test(test, &res, param); + + KUNIT_EXPECT_GE(test, res.value, param->value << res.shift); + KUNIT_EXPECT_LE(test, res.value, (param->value + 1) << res.shift); + KUNIT_EXPECT_LE(test, res.value, res.max_value << res.shift); + + /* No flexibility allowed for 0% and 100%! */ + + if (param->pc == 0) + KUNIT_EXPECT_EQ(test, res.value, 0); + + if (param->pc == 100) + KUNIT_EXPECT_EQ(test, res.value, res.max_value << res.shift); +} + +static const void *test_all_bwa_wd_gen_params(const void *prev, + char *desc) +{ + uintptr_t param = (uintptr_t)prev; + + if (param > 15) + return NULL; + + param++; + + snprintf(desc, KUNIT_PARAM_DESC_SIZE, "wd=%u\n", (unsigned int)param); + + return (void *)param; +} + +static unsigned int test_get_bwa_wd(struct kunit *test) +{ + uintptr_t param = (uintptr_t)test->param_value; + + KUNIT_ASSERT_GE(test, param, 1); + KUNIT_ASSERT_LE(test, param, 16); + + return param; +} + +static void test_mbw_max_to_percent_limits(struct kunit *test) +{ + struct mpam_props fake_props = {0}; + u32 max_value; + + mpam_set_feature(mpam_feat_mbw_max, &fake_props); + fake_props.bwa_wd = test_get_bwa_wd(test); + max_value = GENMASK(15, 16 - fake_props.bwa_wd); + + KUNIT_EXPECT_EQ(test, mbw_max_to_percent(max_value, &fake_props), + MAX_MBA_BW); + KUNIT_EXPECT_EQ(test, mbw_max_to_percent(0, &fake_props), + get_mba_min(&fake_props)); + + /* + * Rounding policy dependent 0% sanity-check: + * With round-to-nearest, the minimum mbw_max value really + * should map to 0% if there are at least 200 steps. + * (100 steps may be enough for some other rounding policies.) + */ + if (fake_props.bwa_wd >= 8) + KUNIT_EXPECT_EQ(test, mbw_max_to_percent(0, &fake_props), 0); + + if (fake_props.bwa_wd < 8 && + mbw_max_to_percent(0, &fake_props) == 0) + kunit_warn(test, "wd=%d: Testsuite/driver Rounding policy mismatch?", + fake_props.bwa_wd); +} + +/* + * Check that converting a percentage to mbw_max and back again (or, as + * appropriate, vice-versa) always restores the original value: + */ +static void test_percent_max_roundtrip_stability(struct kunit *test) +{ + struct mpam_props fake_props = {0}; + unsigned int shift; + u32 pc, max, pc2, max2; + + mpam_set_feature(mpam_feat_mbw_max, &fake_props); + fake_props.bwa_wd = test_get_bwa_wd(test); + shift = 16 - fake_props.bwa_wd; + + /* + * Converting a valid value from the coarser scale to the finer + * scale and back again must yield the original value: + */ + if (fake_props.bwa_wd >= 7) { + /* More than 100 steps: only test exact pc values: */ + for (pc = get_mba_min(&fake_props); pc <= MAX_MBA_BW; pc++) { + max = percent_to_mbw_max(pc, &fake_props); + pc2 = mbw_max_to_percent(max, &fake_props); + KUNIT_EXPECT_EQ(test, pc2, pc); + } + } else { + /* Fewer than 100 steps: only test exact mbw_max values: */ + for (max = 0; max < 1 << 16; max += 1 << shift) { + pc = mbw_max_to_percent(max, &fake_props); + max2 = percent_to_mbw_max(pc, &fake_props); + KUNIT_EXPECT_EQ(test, max2, max); + } + } +} + +static void test_percent_to_max_rounding(struct kunit *test) +{ + const struct percent_value_case *param = test->param_value; + unsigned int num_rounded_up = 0, total = 0; + struct percent_value_test_info res; + + for (param = percent_value_cases, total = 0; + param < &percent_value_cases[ARRAY_SIZE(percent_value_cases)]; + param++, total++) { + __prepare_percent_value_test(test, &res, param); + if (res.value > param->value << res.shift) + num_rounded_up++; + } + + /* + * The MPAM driver applies a round-to-nearest policy, whereas a + * round-down policy seems to have been applied in the + * reference table from which the test vectors were selected. + * + * For a large and well-distributed suite of test vectors, + * about half should be rounded up and half down compared with + * the reference table. The actual test vectors are few in + * number and probably not very well distributed however, so + * tolerate a round-up rate of between 1/4 and 3/4 before + * crying foul: + */ + + kunit_info(test, "Round-up rate: %u%% (%u/%u)\n", + DIV_ROUND_CLOSEST(num_rounded_up * 100, total), + num_rounded_up, total); + + KUNIT_EXPECT_GE(test, 4 * num_rounded_up, 1 * total); + KUNIT_EXPECT_LE(test, 4 * num_rounded_up, 3 * total); +} + +static void test_num_assignable_counters(struct kunit *test) +{ + unsigned int orig_l3_num_allocated_mbwu = l3_num_allocated_mbwu; + u32 orig_mpam_partid_max = mpam_partid_max; + u32 orig_mpam_pmg_max = mpam_pmg_max; + bool orig_cdp_enabled = cdp_enabled; + struct rdt_resource fake_l3; + + /* Force there to be some PARTID/PMG */ + mpam_partid_max = 3; + mpam_pmg_max = 1; + + cdp_enabled = false; + + /* ABMC off, CDP off */ + l3_num_allocated_mbwu = resctrl_arch_system_num_rmid_idx(); + mpam_resctrl_monitor_sync_abmc_vals(&fake_l3); + KUNIT_EXPECT_EQ(test, fake_l3.mon.num_mbm_cntrs, resctrl_arch_system_num_rmid_idx()); + KUNIT_EXPECT_FALSE(test, fake_l3.mon.mbm_cntr_assignable); + KUNIT_EXPECT_FALSE(test, fake_l3.mon.mbm_assign_on_mkdir); + + /* ABMC on, CDP off */ + l3_num_allocated_mbwu = 4; + mpam_resctrl_monitor_sync_abmc_vals(&fake_l3); + KUNIT_EXPECT_EQ(test, fake_l3.mon.num_mbm_cntrs, 4); + KUNIT_EXPECT_TRUE(test, fake_l3.mon.mbm_cntr_assignable); + KUNIT_EXPECT_TRUE(test, fake_l3.mon.mbm_assign_on_mkdir); + + cdp_enabled = true; + + /* ABMC off, CDP on */ + l3_num_allocated_mbwu = resctrl_arch_system_num_rmid_idx(); + mpam_resctrl_monitor_sync_abmc_vals(&fake_l3); + + /* (value not consumed by resctrl) */ + KUNIT_EXPECT_EQ(test, fake_l3.mon.num_mbm_cntrs, resctrl_arch_system_num_rmid_idx() / 2); + + KUNIT_EXPECT_FALSE(test, fake_l3.mon.mbm_cntr_assignable); + KUNIT_EXPECT_FALSE(test, fake_l3.mon.mbm_assign_on_mkdir); + + /* ABMC on, CDP on */ + l3_num_allocated_mbwu = 4; + mpam_resctrl_monitor_sync_abmc_vals(&fake_l3); + KUNIT_EXPECT_EQ(test, fake_l3.mon.num_mbm_cntrs, 2); + KUNIT_EXPECT_TRUE(test, fake_l3.mon.mbm_cntr_assignable); + KUNIT_EXPECT_TRUE(test, fake_l3.mon.mbm_assign_on_mkdir); + + /* ABMC 'on', CDP on - but not enough counters */ + l3_num_allocated_mbwu = 1; + mpam_resctrl_monitor_sync_abmc_vals(&fake_l3); + KUNIT_EXPECT_EQ(test, fake_l3.mon.num_mbm_cntrs, 0); + KUNIT_EXPECT_FALSE(test, fake_l3.mon.mbm_cntr_assignable); + KUNIT_EXPECT_FALSE(test, fake_l3.mon.mbm_assign_on_mkdir); + + /* Restore global variables that were messed with */ + l3_num_allocated_mbwu = orig_l3_num_allocated_mbwu; + mpam_partid_max = orig_mpam_partid_max; + mpam_pmg_max = orig_mpam_pmg_max; + cdp_enabled = orig_cdp_enabled; +} + +static struct kunit_case mpam_resctrl_test_cases[] = { + KUNIT_CASE(test_get_mba_granularity), + KUNIT_CASE(test_mbw_pbm_to_percent), + KUNIT_CASE_PARAM(test_fract16_to_percent, test_percent_value_gen_params), + KUNIT_CASE(test_percent_to_mbw_pbm), + KUNIT_CASE_PARAM(test_percent_to_mbw_max, test_percent_value_gen_params), + KUNIT_CASE_PARAM(test_mbw_max_to_percent_limits, test_all_bwa_wd_gen_params), + KUNIT_CASE(test_percent_to_max_rounding), + KUNIT_CASE_PARAM(test_percent_max_roundtrip_stability, + test_all_bwa_wd_gen_params), + KUNIT_CASE(test_num_assignable_counters), + {} +}; + +static struct kunit_suite mpam_resctrl_test_suite = { + .name = "mpam_resctrl_test_suite", + .test_cases = mpam_resctrl_test_cases, +}; + +kunit_test_suites(&mpam_resctrl_test_suite); diff --git a/drivers/soc/tegra/fuse/tegra-apbmisc.c b/drivers/soc/tegra/fuse/tegra-apbmisc.c index 0ce94fdc536fb..7ef1782534d5e 100644 --- a/drivers/soc/tegra/fuse/tegra-apbmisc.c +++ b/drivers/soc/tegra/fuse/tegra-apbmisc.c @@ -4,6 +4,7 @@ */ #include +#include #include #include #include @@ -27,6 +28,11 @@ #define PMC_STRAPPING_OPT_A_RAM_CODE_MASK_SHORT \ (0x3 << PMC_STRAPPING_OPT_A_RAM_CODE_SHIFT) +#define TEGRA_SMCCC_PLATFORM(x) ((x >> 8) & 0xff) +#define TEGRA_SMCCC_CHIP_ID(x) ((x >> 4) & 0xff) +#define TEGRA_SMCCC_MAJOR_REV(x) (x & 0xf) +#define TEGRA_SMCCC_MINOR_REV(x) (x & 0xf) + static void __iomem *apbmisc_base; static bool long_ram_code; static u32 strapping; @@ -41,21 +47,46 @@ u32 tegra_read_chipid(void) u8 tegra_get_chip_id(void) { +#ifdef CONFIG_HAVE_ARM_SMCCC_DISCOVERY + s32 soc_id = arm_smccc_get_soc_id_version(); + + if (soc_id >= 0) + return TEGRA_SMCCC_CHIP_ID(soc_id); +#endif return (tegra_read_chipid() >> 8) & 0xff; } u8 tegra_get_major_rev(void) { +#ifdef CONFIG_HAVE_ARM_SMCCC_DISCOVERY + s32 soc_id = arm_smccc_get_soc_id_version(); + + if (soc_id >= 0) + return TEGRA_SMCCC_MAJOR_REV(soc_id); +#endif return (tegra_read_chipid() >> 4) & 0xf; } u8 tegra_get_minor_rev(void) { +#ifdef CONFIG_HAVE_ARM_SMCCC_DISCOVERY + s32 revision = arm_smccc_get_soc_id_revision(); + + if (revision >= 0) + return TEGRA_SMCCC_MINOR_REV(revision); +#endif return (tegra_read_chipid() >> 16) & 0xf; + } u8 tegra_get_platform(void) { +#ifdef CONFIG_HAVE_ARM_SMCCC_DISCOVERY + s32 revision = arm_smccc_get_soc_id_revision(); + + if (revision >= 0) + return TEGRA_SMCCC_PLATFORM(revision); +#endif return (tegra_read_chipid() >> 20) & 0xf; } diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index f985d779e5e69..c4a6544aa1075 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1510,8 +1510,6 @@ int xhci_endpoint_init(struct xhci_hcd *xhci, ep_ctx->tx_info = cpu_to_le32(EP_MAX_ESIT_PAYLOAD_LO(max_esit_payload) | EP_AVG_TRB_LENGTH(avg_trb_len)); - ep_ctx->reserved[0] = cpu_to_le32(0x1 | (0x1 << 11)); //mtk's bpks & bm - pr_err("%s rsv %#x\n", __func__, ep_ctx->reserved[0]); return 0; } @@ -1618,12 +1616,10 @@ void xhci_endpoint_copy(struct xhci_hcd *xhci, in_ep_ctx->ep_info2 = out_ep_ctx->ep_info2; in_ep_ctx->deq = out_ep_ctx->deq; in_ep_ctx->tx_info = out_ep_ctx->tx_info; -#if 0 if (xhci->quirks & XHCI_MTK_HOST) { in_ep_ctx->reserved[0] = out_ep_ctx->reserved[0]; in_ep_ctx->reserved[1] = out_ep_ctx->reserved[1]; } -#endif } /* Copy output xhci_slot_ctx to the input xhci_slot_ctx. diff --git a/drivers/vfio/pci/nvgrace-gpu/egm.c b/drivers/vfio/pci/nvgrace-gpu/egm.c index 6cdcec03d03f6..25b0ecc76c098 100644 --- a/drivers/vfio/pci/nvgrace-gpu/egm.c +++ b/drivers/vfio/pci/nvgrace-gpu/egm.c @@ -33,6 +33,7 @@ struct egm_region { DECLARE_HASHTABLE(htbl, 0x10); #ifdef CONFIG_MEMORY_FAILURE struct pfn_address_space pfn_address_space; + bool pfn_space_registered; #endif }; @@ -140,7 +141,10 @@ static int nvgrace_egm_release(struct inode *inode, struct file *file) if (atomic_dec_and_test(®ion->open_count)) { #ifdef CONFIG_MEMORY_FAILURE - unregister_pfn_address_space(®ion->pfn_address_space); + if (region->pfn_space_registered) { + unregister_pfn_address_space(®ion->pfn_address_space); + region->pfn_space_registered = false; + } #endif file->private_data = NULL; } @@ -169,7 +173,10 @@ static int nvgrace_egm_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_ops = &nvgrace_egm_mmap_ops; ret = nvgrace_egm_register_pfn_range(region, vma); + if (ret == 0) + region->pfn_space_registered = true; #endif + return ret; } @@ -458,6 +465,9 @@ int register_egm_node(struct pci_dev *pdev) region->egmpxm = egmpxm; hash_init(region->htbl); +#ifdef CONFIG_MEMORY_FAILURE + region->pfn_space_registered = false; +#endif INIT_LIST_HEAD(®ion->gpus); atomic_set(®ion->open_count, 0); diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 3ec3324c20603..849688b3fce0a 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -1140,6 +1140,70 @@ void debugfs_create_str(const char *name, umode_t mode, &fops_str_ro, &fops_str_wo); } +static ssize_t debugfs_read_file_cpumask(struct file *file, + char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct dentry *dentry = F_DENTRY(file); + struct cpumask *cpumask; + char *kernel_buf; + ssize_t ret; + int len; + + ret = debugfs_file_get(dentry); + if (unlikely(ret)) + return ret; + + /* How long is a piece of string? */ + kernel_buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!kernel_buf) { + debugfs_file_put(dentry); + return -ENOMEM; + } + + cpumask = (struct cpumask *)file->private_data; + len = scnprintf(kernel_buf, PAGE_SIZE, + "%*pb\n", cpumask_pr_args(cpumask)); + debugfs_file_put(dentry); + if (len + 1 >= PAGE_SIZE) { + kfree(kernel_buf); + return -EIO; + } + + ret = simple_read_from_buffer(user_buf, count, ppos, kernel_buf, len); + kfree(kernel_buf); + + return ret; +} + +static const struct file_operations fops_cpumask_ro = { + .read = debugfs_read_file_cpumask, + .open = simple_open, + .llseek = default_llseek, +}; + +/** + * debugfs_create_cpumask - create a read-only debugfs file that is used to read a cpumask + * @name: a pointer to a string containing the name of the file to create. + * @mode: the permission that the file should have + * @parent: a pointer to the parent dentry for this file. This should be a + * directory dentry if set. If this parameter is %NULL, then the + * file will be created in the root of the debugfs filesystem. + * @value: a pointer to the variable that the file should read from. + * + * This function creates a file in debugfs with the given name that + * contains the value of the variable @value. + */ +void debugfs_create_cpumask(const char *name, umode_t mode, + struct dentry *parent, struct cpumask *value) +{ + /* Only read-only is supported */ + WARN_ON_ONCE(mode & S_IWUGO); + + debugfs_create_mode_unsafe(name, mode, parent, value, &fops_cpumask_ro, + &fops_cpumask_ro, &fops_cpumask_ro); +} + static ssize_t read_file_blob(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { diff --git a/fs/resctrl/Kconfig b/fs/resctrl/Kconfig index 21671301bd8a4..145d837c190a3 100644 --- a/fs/resctrl/Kconfig +++ b/fs/resctrl/Kconfig @@ -37,3 +37,9 @@ config RESCTRL_RMID_DEPENDS_ON_CLOSID Enabled by the architecture when the RMID values depend on the CLOSID. This causes the CLOSID allocator to search for CLOSID with clean RMID. + +config RESCTRL_IOMMU + bool + help + Enabled by the architecture when some IOMMU are able to be configured + with CLOSID/RMID. diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c index 3c39cfacb2518..c3688cbe0ff5c 100644 --- a/fs/resctrl/ctrlmondata.c +++ b/fs/resctrl/ctrlmondata.c @@ -38,19 +38,11 @@ typedef int (ctrlval_parser_t)(struct rdt_parse_data *data, * hardware. The allocated bandwidth percentage is rounded to the next * control step available on the hardware. */ -static bool bw_validate(char *buf, u32 *data, struct rdt_resource *r) +static bool bw_validate(char *buf, u32 *data, struct resctrl_schema *s) { int ret; u32 bw; - /* - * Only linear delay values is supported for current Intel SKUs. - */ - if (!r->membw.delay_linear && r->membw.arch_needs_linear) { - rdt_last_cmd_puts("No support for non-linear MB domains\n"); - return false; - } - ret = kstrtou32(buf, 10, &bw); if (ret) { rdt_last_cmd_printf("Invalid MB value %s\n", buf); @@ -58,18 +50,18 @@ static bool bw_validate(char *buf, u32 *data, struct rdt_resource *r) } /* Nothing else to do if software controller is enabled. */ - if (is_mba_sc(r)) { + if (is_mba_sc(s->res)) { *data = bw; return true; } - if (bw < r->membw.min_bw || bw > r->membw.max_bw) { + if (bw < s->membw.min_bw || bw > s->membw.max_bw) { rdt_last_cmd_printf("MB value %u out of range [%d,%d]\n", - bw, r->membw.min_bw, r->membw.max_bw); + bw, s->membw.min_bw, s->membw.max_bw); return false; } - *data = roundup(bw, (unsigned long)r->membw.bw_gran); + *data = resctrl_arch_round_bw(bw, s); return true; } @@ -81,13 +73,7 @@ static int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, struct rdt_resource *r = s->res; u32 bw_val; - cfg = &d->staged_config[s->conf_type]; - if (cfg->have_new_ctrl) { - rdt_last_cmd_printf("Duplicate domain %d\n", d->hdr.id); - return -EINVAL; - } - - if (!bw_validate(data->buf, &bw_val, r)) + if (!bw_validate(data->buf, &bw_val, s)) return -EINVAL; if (is_mba_sc(r)) { @@ -95,6 +81,7 @@ static int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, return 0; } + cfg = &d->staged_config[s->conf_type]; cfg->new_ctrl = bw_val; cfg->have_new_ctrl = true; @@ -161,12 +148,6 @@ static int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, struct rdt_resource *r = s->res; u32 cbm_val; - cfg = &d->staged_config[s->conf_type]; - if (cfg->have_new_ctrl) { - rdt_last_cmd_printf("Duplicate domain %d\n", d->hdr.id); - return -EINVAL; - } - /* * Cannot set up more than one pseudo-locked region in a cache * hierarchy. @@ -204,6 +185,7 @@ static int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, } } + cfg = &d->staged_config[s->conf_type]; cfg->new_ctrl = cbm_val; cfg->have_new_ctrl = true; @@ -231,11 +213,13 @@ static int parse_line(char *line, struct resctrl_schema *s, /* Walking r->domains, ensure it can't race with cpuhp */ lockdep_assert_cpus_held(); - switch (r->schema_fmt) { + switch (s->schema_fmt) { case RESCTRL_SCHEMA_BITMAP: parse_ctrlval = &parse_cbm; break; - case RESCTRL_SCHEMA_RANGE: + case RESCTRL_SCHEMA_PERCENT: + case RESCTRL_SCHEMA_MBPS: + case RESCTRL_SCHEMA__AMD_MBA: parse_ctrlval = &parse_bw; break; } @@ -249,6 +233,15 @@ static int parse_line(char *line, struct resctrl_schema *s, return -EINVAL; } + /* + * Only linear delay values is supported for current Intel SKUs. + */ + if (r->rid == RDT_RESOURCE_MBA && + !r->mba.delay_linear && r->mba.arch_needs_linear) { + rdt_last_cmd_puts("No support for non-linear MB domains\n"); + return -EINVAL; + } + next: if (!line || line[0] == '\0') return 0; @@ -261,12 +254,17 @@ static int parse_line(char *line, struct resctrl_schema *s, dom = strim(dom); list_for_each_entry(d, &r->ctrl_domains, hdr.list) { if (d->hdr.id == dom_id) { + cfg = &d->staged_config[t]; + if (cfg->have_new_ctrl) { + rdt_last_cmd_printf("Duplicate domain %d\n", d->hdr.id); + return -EINVAL; + } + data.buf = dom; data.rdtgrp = rdtgrp; if (parse_ctrlval(&data, s, d)) return -EINVAL; if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { - cfg = &d->staged_config[t]; /* * In pseudo-locking setup mode and just * parsed a valid CBM that should be @@ -473,12 +471,12 @@ ssize_t rdtgroup_mba_mbps_event_write(struct kernfs_open_file *of, rdt_last_cmd_clear(); if (!strcmp(buf, "mbm_local_bytes")) { - if (resctrl_arch_is_mbm_local_enabled()) + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) rdtgrp->mba_mbps_event = QOS_L3_MBM_LOCAL_EVENT_ID; else ret = -EINVAL; } else if (!strcmp(buf, "mbm_total_bytes")) { - if (resctrl_arch_is_mbm_total_enabled()) + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) rdtgrp->mba_mbps_event = QOS_L3_MBM_TOTAL_EVENT_ID; else ret = -EINVAL; @@ -563,10 +561,15 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, rr->r = r; rr->d = d; rr->first = first; - rr->arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, evtid); - if (IS_ERR(rr->arch_mon_ctx)) { - rr->err = -EINVAL; - return; + if (resctrl_arch_mbm_cntr_assign_enabled(r) && + resctrl_is_mbm_event(evtid)) { + rr->is_mbm_cntr = true; + } else { + rr->arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, evtid); + if (IS_ERR(rr->arch_mon_ctx)) { + rr->err = -EINVAL; + return; + } } cpu = cpumask_any_housekeeping(cpumask, RESCTRL_PICK_ANY_CPU); @@ -582,7 +585,8 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, else smp_call_on_cpu(cpu, smp_mon_event_count, rr, false); - resctrl_arch_mon_ctx_free(r, evtid, rr->arch_mon_ctx); + if (rr->arch_mon_ctx) + resctrl_arch_mon_ctx_free(r, evtid, rr->arch_mon_ctx); } int rdtgroup_mondata_show(struct seq_file *m, void *arg) @@ -653,10 +657,16 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) checkresult: + /* + * -ENOENT is a special case, set only when "mbm_event" counter assignment + * mode is enabled and no counter has been assigned. + */ if (rr.err == -EIO) seq_puts(m, "Error\n"); else if (rr.err == -EINVAL) seq_puts(m, "Unavailable\n"); + else if (rr.err == -ENOENT) + seq_puts(m, "Unassigned\n"); else seq_printf(m, "%llu\n", rr.val); diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 9a8cf6f11151d..f5f74342af317 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -42,6 +42,8 @@ struct rdt_fs_context { bool enable_cdpl3; bool enable_mba_mbps; bool enable_debug; + bool mb_uses_numa_nid; + bool enable_abi_playground; }; static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc) @@ -52,19 +54,31 @@ static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc) } /** - * struct mon_evt - Entry in the event list of a resource + * struct mon_evt - Properties of a monitor event * @evtid: event id + * @rid: resource id for this event * @name: name of the event + * @evt_cfg: Event configuration value that represents the + * memory transactions (e.g., READS_TO_LOCAL_MEM, + * READS_TO_REMOTE_MEM) being tracked by @evtid. + * Only valid if @evtid is an MBM event. * @configurable: true if the event is configurable - * @list: entry in &rdt_resource->evt_list + * @enabled: true if the event is enabled */ struct mon_evt { enum resctrl_event_id evtid; + enum resctrl_res_level rid; char *name; + u32 evt_cfg; bool configurable; - struct list_head list; + bool enabled; }; +extern struct mon_evt mon_event_all[QOS_NUM_EVENTS]; + +#define for_each_mon_event(mevt) for (mevt = &mon_event_all[QOS_FIRST_EVENT]; \ + mevt < &mon_event_all[QOS_NUM_EVENTS]; mevt++) + /** * struct mon_data - Monitoring details for each event file. * @list: Member of the global @mon_data_kn_priv_list list. @@ -99,6 +113,8 @@ struct mon_data { * @evtid: Which monitor event to read. * @first: Initialize MBM counter when true. * @ci: Cacheinfo for L3. Only set when @d is NULL. Used when summing domains. + * @is_mbm_cntr: true if "mbm_event" counter assignment mode is enabled and it + * is an MBM event. * @err: Error encountered when reading counter. * @val: Returned value of event counter. If @rgrp is a parent resource group, * @val includes the sum of event counts from its child resource groups. @@ -113,6 +129,7 @@ struct rmid_read { enum resctrl_event_id evtid; bool first; struct cacheinfo *ci; + bool is_mbm_cntr; int err; u64 val; void *arch_mon_ctx; @@ -220,12 +237,19 @@ struct rdtgroup { #define RFTYPE_TOP BIT(6) +/* files that are specific to a type of resource, e.g. throttle_mode */ #define RFTYPE_RES_CACHE BIT(8) - #define RFTYPE_RES_MB BIT(9) #define RFTYPE_DEBUG BIT(10) +#define RFTYPE_ASSIGN_CONFIG BIT(11) + +/* files that are specific to a type of control, e.g. percent_min */ +#define RFTYPE_SCHEMA_BITMAP BIT(11) +#define RFTYPE_SCHEMA_PERCENT BIT(12) +#define RFTYPE_SCHEMA_MBPS BIT(13) + #define RFTYPE_CTRL_INFO (RFTYPE_INFO | RFTYPE_CTRL) #define RFTYPE_MON_INFO (RFTYPE_INFO | RFTYPE_MON) @@ -279,6 +303,8 @@ struct mbm_state { u32 prev_bw; }; +DECLARE_STATIC_KEY_FALSE(resctrl_abi_playground); + extern struct mutex rdtgroup_mutex; static inline const char *rdt_kn_name(const struct kernfs_node *kn) @@ -375,6 +401,41 @@ bool closid_allocated(unsigned int closid); int resctrl_find_cleanest_closid(void); +void *rdt_kn_parent_priv(struct kernfs_node *kn); + +int resctrl_mbm_assign_mode_show(struct kernfs_open_file *of, struct seq_file *s, void *v); + +ssize_t resctrl_mbm_assign_mode_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off); + +void resctrl_bmec_files_show(struct rdt_resource *r, struct kernfs_node *l3_mon_kn, + bool show); + +int resctrl_num_mbm_cntrs_show(struct kernfs_open_file *of, struct seq_file *s, void *v); + +int resctrl_available_mbm_cntrs_show(struct kernfs_open_file *of, struct seq_file *s, + void *v); + +void rdtgroup_assign_cntrs(struct rdtgroup *rdtgrp); + +void rdtgroup_unassign_cntrs(struct rdtgroup *rdtgrp); + +int event_filter_show(struct kernfs_open_file *of, struct seq_file *seq, void *v); + +ssize_t event_filter_write(struct kernfs_open_file *of, char *buf, size_t nbytes, + loff_t off); + +int resctrl_mbm_assign_on_mkdir_show(struct kernfs_open_file *of, + struct seq_file *s, void *v); + +ssize_t resctrl_mbm_assign_on_mkdir_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off); + +int mbm_L3_assignments_show(struct kernfs_open_file *of, struct seq_file *s, void *v); + +ssize_t mbm_L3_assignments_write(struct kernfs_open_file *of, char *buf, size_t nbytes, + loff_t off); + #ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index 7326c28a7908f..e62432467817f 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -18,6 +18,7 @@ #define pr_fmt(fmt) "resctrl: " fmt #include +#include #include #include #include @@ -98,12 +99,17 @@ unsigned int resctrl_rmid_realloc_limit; * * The domain's rmid_busy_llc and rmid_ptrs[] are sized by index. The arch code * must accept an attempt to read every index. + * + * Returns NULL if the rmid_ptrs[] array is not allocated. */ static inline struct rmid_entry *__rmid_entry(u32 idx) { struct rmid_entry *entry; u32 closid, rmid; + if (!rmid_ptrs) + return NULL; + entry = &rmid_ptrs[idx]; resctrl_arch_rmid_idx_decode(idx, &closid, &rmid); @@ -113,6 +119,20 @@ static inline struct rmid_entry *__rmid_entry(u32 idx) return entry; } +static bool __has_closid_num_dirty_rmid_array(void) +{ + lockdep_assert_held(&rdtgroup_mutex); + + if (!IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) + return false; + + /* + * Avoid a race with dom_data_exit() freeing the array under + * rdtgroup_mutex. + */ + return closid_num_dirty_rmid; +} + static void limbo_release_entry(struct rmid_entry *entry) { lockdep_assert_held(&rdtgroup_mutex); @@ -120,7 +140,7 @@ static void limbo_release_entry(struct rmid_entry *entry) rmid_limbo_count--; list_add_tail(&entry->list, &rmid_free_lru); - if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) + if (__has_closid_num_dirty_rmid_array()) closid_num_dirty_rmid[entry->closid]--; } @@ -159,6 +179,8 @@ void __check_limbo(struct rdt_mon_domain *d, bool force_free) break; entry = __rmid_entry(idx); + if (!entry) + break; if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid, QOS_L3_OCCUP_EVENT_ID, &val, arch_mon_ctx)) { @@ -240,7 +262,7 @@ int resctrl_find_cleanest_closid(void) lockdep_assert_held(&rdtgroup_mutex); - if (!IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) + if (!__has_closid_num_dirty_rmid_array()) return -EIO; for (i = 0; i < closids_supported(); i++) { @@ -313,7 +335,7 @@ static void add_rmid_to_limbo(struct rmid_entry *entry) } rmid_limbo_count++; - if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) + if (__has_closid_num_dirty_rmid_array()) closid_num_dirty_rmid[entry->closid]++; } @@ -324,6 +346,10 @@ void free_rmid(u32 closid, u32 rmid) lockdep_assert_held(&rdtgroup_mutex); + /* rmid_ptrs[] not allocated if there are no monitors */ + if (!resctrl_arch_mon_capable()) + return; + /* * Do not allow the default rmid to be free'd. Comparing by index * allows architectures that ignore the closid parameter to avoid an @@ -335,8 +361,10 @@ void free_rmid(u32 closid, u32 rmid) return; entry = __rmid_entry(idx); + if (!entry) + return; - if (resctrl_arch_is_llc_occupancy_enabled()) + if (resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID)) add_rmid_to_limbo(entry); else list_add_tail(&entry->list, &rmid_free_lru); @@ -346,27 +374,97 @@ static struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid, u32 rmid, enum resctrl_event_id evtid) { u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid); + struct mbm_state *state; - switch (evtid) { - case QOS_L3_MBM_TOTAL_EVENT_ID: - return &d->mbm_total[idx]; - case QOS_L3_MBM_LOCAL_EVENT_ID: - return &d->mbm_local[idx]; - default: + if (!resctrl_is_mbm_event(evtid)) return NULL; + + state = d->mbm_states[MBM_STATE_IDX(evtid)]; + + return state ? &state[idx] : NULL; +} + +/* + * mbm_cntr_get() - Return the counter ID for the matching @evtid and @rdtgrp. + * + * Return: + * Valid counter ID on success, or -ENOENT on failure. + */ +static int mbm_cntr_get(struct rdt_resource *r, struct rdt_mon_domain *d, + struct rdtgroup *rdtgrp, enum resctrl_event_id evtid) +{ + int cntr_id; + + if (!r->mon.mbm_cntr_assignable) + return -ENOENT; + + if (!resctrl_is_mbm_event(evtid)) + return -ENOENT; + + for (cntr_id = 0; cntr_id < r->mon.num_mbm_cntrs; cntr_id++) { + if (d->cntr_cfg[cntr_id].rdtgrp == rdtgrp && + d->cntr_cfg[cntr_id].evtid == evtid) + return cntr_id; } + + return -ENOENT; +} + +/* + * mbm_cntr_alloc() - Initialize and return a new counter ID in the domain @d. + * Caller must ensure that the specified event is not assigned already. + * + * Return: + * Valid counter ID on success, or -ENOSPC on failure. + */ +static int mbm_cntr_alloc(struct rdt_resource *r, struct rdt_mon_domain *d, + struct rdtgroup *rdtgrp, enum resctrl_event_id evtid) +{ + int cntr_id; + + for (cntr_id = 0; cntr_id < r->mon.num_mbm_cntrs; cntr_id++) { + if (!d->cntr_cfg[cntr_id].rdtgrp) { + d->cntr_cfg[cntr_id].rdtgrp = rdtgrp; + d->cntr_cfg[cntr_id].evtid = evtid; + return cntr_id; + } + } + + return -ENOSPC; } -static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) +/* + * mbm_cntr_free() - Clear the counter ID configuration details in the domain @d. + */ +static void mbm_cntr_free(struct rdt_mon_domain *d, int cntr_id) +{ + memset(&d->cntr_cfg[cntr_id], 0, sizeof(*d->cntr_cfg)); +} + +static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr) { int cpu = smp_processor_id(); + u32 closid = rdtgrp->closid; + u32 rmid = rdtgrp->mon.rmid; struct rdt_mon_domain *d; + int cntr_id = -ENOENT; struct mbm_state *m; int err, ret; u64 tval = 0; + if (rr->is_mbm_cntr) { + cntr_id = mbm_cntr_get(rr->r, rr->d, rdtgrp, rr->evtid); + if (cntr_id < 0) { + rr->err = -ENOENT; + return -EINVAL; + } + } + if (rr->first) { - resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, rr->evtid); + if (rr->is_mbm_cntr) + resctrl_arch_reset_cntr(rr->r, rr->d, closid, rmid, cntr_id, rr->evtid); + else + resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, rr->evtid); m = get_mbm_state(rr->d, closid, rmid, rr->evtid); if (m) memset(m, 0, sizeof(struct mbm_state)); @@ -377,8 +475,12 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) /* Reading a single domain, must be on a CPU in that domain. */ if (!cpumask_test_cpu(cpu, &rr->d->hdr.cpu_mask)) return -EINVAL; - rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid, - rr->evtid, &tval, rr->arch_mon_ctx); + if (rr->is_mbm_cntr) + rr->err = resctrl_arch_cntr_read(rr->r, rr->d, closid, rmid, cntr_id, + rr->evtid, &tval); + else + rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid, + rr->evtid, &tval, rr->arch_mon_ctx); if (rr->err) return rr->err; @@ -402,8 +504,12 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) list_for_each_entry(d, &rr->r->mon_domains, hdr.list) { if (d->ci_id != rr->ci->id) continue; - err = resctrl_arch_rmid_read(rr->r, d, closid, rmid, - rr->evtid, &tval, rr->arch_mon_ctx); + if (rr->is_mbm_cntr) + err = resctrl_arch_cntr_read(rr->r, d, closid, rmid, cntr_id, + rr->evtid, &tval); + else + err = resctrl_arch_rmid_read(rr->r, d, closid, rmid, + rr->evtid, &tval, rr->arch_mon_ctx); if (!err) { rr->val += tval; ret = 0; @@ -419,8 +525,8 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) /* * mbm_bw_count() - Update bw count from values previously read by * __mon_event_count(). - * @closid: The closid used to identify the cached mbm_state. - * @rmid: The rmid used to identify the cached mbm_state. + * @rdtgrp: resctrl group associated with the CLOSID and RMID to identify + * the cached mbm_state. * @rr: The struct rmid_read populated by __mon_event_count(). * * Supporting function to calculate the memory bandwidth @@ -428,9 +534,11 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) * __mon_event_count() is compared with the chunks value from the previous * invocation. This must be called once per second to maintain values in MBps. */ -static void mbm_bw_count(u32 closid, u32 rmid, struct rmid_read *rr) +static void mbm_bw_count(struct rdtgroup *rdtgrp, struct rmid_read *rr) { u64 cur_bw, bytes, cur_bytes; + u32 closid = rdtgrp->closid; + u32 rmid = rdtgrp->mon.rmid; struct mbm_state *m; m = get_mbm_state(rr->d, closid, rmid, rr->evtid); @@ -459,7 +567,7 @@ void mon_event_count(void *info) rdtgrp = rr->rgrp; - ret = __mon_event_count(rdtgrp->closid, rdtgrp->mon.rmid, rr); + ret = __mon_event_count(rdtgrp, rr); /* * For Ctrl groups read data from child monitor groups and @@ -470,8 +578,7 @@ void mon_event_count(void *info) if (rdtgrp->type == RDTCTRL_GROUP) { list_for_each_entry(entry, head, mon.crdtgrp_list) { - if (__mon_event_count(entry->closid, entry->mon.rmid, - rr) == 0) + if (__mon_event_count(entry, rr) == 0) ret = 0; } } @@ -602,44 +709,49 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mon_domain *dom_mbm) } static void mbm_update_one_event(struct rdt_resource *r, struct rdt_mon_domain *d, - u32 closid, u32 rmid, enum resctrl_event_id evtid) + struct rdtgroup *rdtgrp, enum resctrl_event_id evtid) { struct rmid_read rr = {0}; rr.r = r; rr.d = d; rr.evtid = evtid; - rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid); - if (IS_ERR(rr.arch_mon_ctx)) { - pr_warn_ratelimited("Failed to allocate monitor context: %ld", - PTR_ERR(rr.arch_mon_ctx)); - return; + if (resctrl_arch_mbm_cntr_assign_enabled(r)) { + rr.is_mbm_cntr = true; + } else { + rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid); + if (IS_ERR(rr.arch_mon_ctx)) { + pr_warn_ratelimited("Failed to allocate monitor context: %ld", + PTR_ERR(rr.arch_mon_ctx)); + return; + } } - __mon_event_count(closid, rmid, &rr); + __mon_event_count(rdtgrp, &rr); /* * If the software controller is enabled, compute the * bandwidth for this event id. */ if (is_mba_sc(NULL)) - mbm_bw_count(closid, rmid, &rr); + mbm_bw_count(rdtgrp, &rr); - resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx); + if (rr.arch_mon_ctx) + resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx); } static void mbm_update(struct rdt_resource *r, struct rdt_mon_domain *d, - u32 closid, u32 rmid) + struct rdtgroup *rdtgrp) { /* * This is protected from concurrent reads from user as both * the user and overflow handler hold the global mutex. */ - if (resctrl_arch_is_mbm_total_enabled()) - mbm_update_one_event(r, d, closid, rmid, QOS_L3_MBM_TOTAL_EVENT_ID); + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) + mbm_update_one_event(r, d, rdtgrp, QOS_L3_MBM_TOTAL_EVENT_ID); - if (resctrl_arch_is_mbm_local_enabled()) - mbm_update_one_event(r, d, closid, rmid, QOS_L3_MBM_LOCAL_EVENT_ID); + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) + mbm_update_one_event(r, d, rdtgrp, QOS_L3_MBM_LOCAL_EVENT_ID); } /* @@ -652,6 +764,7 @@ void cqm_handle_limbo(struct work_struct *work) struct rdt_mon_domain *d; cpus_read_lock(); + get_online_mems(); mutex_lock(&rdtgroup_mutex); d = container_of(work, struct rdt_mon_domain, cqm_limbo.work); @@ -666,6 +779,7 @@ void cqm_handle_limbo(struct work_struct *work) } mutex_unlock(&rdtgroup_mutex); + put_online_mems(); cpus_read_unlock(); } @@ -699,6 +813,7 @@ void mbm_handle_overflow(struct work_struct *work) struct rdt_resource *r; cpus_read_lock(); + get_online_mems(); mutex_lock(&rdtgroup_mutex); /* @@ -712,11 +827,11 @@ void mbm_handle_overflow(struct work_struct *work) d = container_of(work, struct rdt_mon_domain, mbm_over.work); list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { - mbm_update(r, d, prgrp->closid, prgrp->mon.rmid); + mbm_update(r, d, prgrp); head = &prgrp->mon.crdtgrp_list; list_for_each_entry(crgrp, head, mon.crdtgrp_list) - mbm_update(r, d, crgrp->closid, crgrp->mon.rmid); + mbm_update(r, d, crgrp); if (is_mba_sc(NULL)) update_mba_bw(prgrp, d); @@ -732,6 +847,7 @@ void mbm_handle_overflow(struct work_struct *work) out_unlock: mutex_unlock(&rdtgroup_mutex); + put_online_mems(); cpus_read_unlock(); } @@ -752,8 +868,10 @@ void mbm_setup_overflow_handler(struct rdt_mon_domain *dom, unsigned long delay_ /* * When a domain comes online there is no guarantee the filesystem is * mounted. If not, there is no need to catch counter overflow. + * Some architecture may have ~64bit counters, and can ignore overflow. */ - if (!resctrl_mounted || !resctrl_arch_mon_capable()) + if (!resctrl_mounted || !resctrl_arch_mon_capable() || + !resctrl_arch_mon_can_overflow()) return; cpu = cpumask_any_housekeeping(&dom->hdr.cpu_mask, exclude_cpu); dom->mbm_work_cpu = cpu; @@ -815,6 +933,7 @@ static int dom_data_init(struct rdt_resource *r) idx = resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID, RESCTRL_RESERVED_RMID); entry = __rmid_entry(idx); + WARN_ON_ONCE(!entry); list_del(&entry->list); out_unlock: @@ -842,38 +961,819 @@ static void dom_data_exit(struct rdt_resource *r) mutex_unlock(&rdtgroup_mutex); } -static struct mon_evt llc_occupancy_event = { - .name = "llc_occupancy", - .evtid = QOS_L3_OCCUP_EVENT_ID, +/* + * All available events. Architecture code marks the ones that + * are supported by a system using resctrl_enable_mon_event() + * to set .enabled. + */ +struct mon_evt mon_event_all[QOS_NUM_EVENTS] = { + [QOS_L3_OCCUP_EVENT_ID] = { + .name = "llc_occupancy", + .evtid = QOS_L3_OCCUP_EVENT_ID, + .rid = RDT_RESOURCE_L3, + }, + [QOS_L3_MBM_TOTAL_EVENT_ID] = { + .name = "mbm_total_bytes", + .evtid = QOS_L3_MBM_TOTAL_EVENT_ID, + .rid = RDT_RESOURCE_L3, + }, + [QOS_L3_MBM_LOCAL_EVENT_ID] = { + .name = "mbm_local_bytes", + .evtid = QOS_L3_MBM_LOCAL_EVENT_ID, + .rid = RDT_RESOURCE_L3, + }, }; -static struct mon_evt mbm_total_event = { - .name = "mbm_total_bytes", - .evtid = QOS_L3_MBM_TOTAL_EVENT_ID, +void resctrl_enable_mon_event(enum resctrl_event_id eventid) +{ + if (WARN_ON_ONCE(eventid < QOS_FIRST_EVENT || eventid >= QOS_NUM_EVENTS)) + return; + if (mon_event_all[eventid].enabled) { + pr_warn("Duplicate enable for event %d\n", eventid); + return; + } + + mon_event_all[eventid].enabled = true; +} + +bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid) +{ + return eventid >= QOS_FIRST_EVENT && eventid < QOS_NUM_EVENTS && + mon_event_all[eventid].enabled; +} + +u32 resctrl_get_mon_evt_cfg(enum resctrl_event_id evtid) +{ + return mon_event_all[evtid].evt_cfg; +} + +/** + * struct mbm_transaction - Memory transaction an MBM event can be configured with. + * @name: Name of memory transaction (read, write ...). + * @val: The bit (eg. READS_TO_LOCAL_MEM or READS_TO_REMOTE_MEM) used to + * represent the memory transaction within an event's configuration. + */ +struct mbm_transaction { + char name[32]; + u32 val; }; -static struct mon_evt mbm_local_event = { - .name = "mbm_local_bytes", - .evtid = QOS_L3_MBM_LOCAL_EVENT_ID, +/* Decoded values for each type of memory transaction. */ +static struct mbm_transaction mbm_transactions[NUM_MBM_TRANSACTIONS] = { + {"local_reads", READS_TO_LOCAL_MEM}, + {"remote_reads", READS_TO_REMOTE_MEM}, + {"local_non_temporal_writes", NON_TEMP_WRITE_TO_LOCAL_MEM}, + {"remote_non_temporal_writes", NON_TEMP_WRITE_TO_REMOTE_MEM}, + {"local_reads_slow_memory", READS_TO_LOCAL_S_MEM}, + {"remote_reads_slow_memory", READS_TO_REMOTE_S_MEM}, + {"dirty_victim_writes_all", DIRTY_VICTIMS_TO_ALL_MEM}, }; +int event_filter_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) +{ + struct mon_evt *mevt = rdt_kn_parent_priv(of->kn); + struct rdt_resource *r; + bool sep = false; + int ret = 0, i; + + mutex_lock(&rdtgroup_mutex); + rdt_last_cmd_clear(); + + r = resctrl_arch_get_resource(mevt->rid); + if (!resctrl_arch_mbm_cntr_assign_enabled(r)) { + rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n"); + ret = -EINVAL; + goto out_unlock; + } + + for (i = 0; i < NUM_MBM_TRANSACTIONS; i++) { + if (mevt->evt_cfg & mbm_transactions[i].val) { + if (sep) + seq_putc(seq, ','); + seq_printf(seq, "%s", mbm_transactions[i].name); + sep = true; + } + } + seq_putc(seq, '\n'); + +out_unlock: + mutex_unlock(&rdtgroup_mutex); + + return ret; +} + +int resctrl_mbm_assign_on_mkdir_show(struct kernfs_open_file *of, struct seq_file *s, + void *v) +{ + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); + int ret = 0; + + mutex_lock(&rdtgroup_mutex); + rdt_last_cmd_clear(); + + if (!resctrl_arch_mbm_cntr_assign_enabled(r)) { + rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n"); + ret = -EINVAL; + goto out_unlock; + } + + seq_printf(s, "%u\n", r->mon.mbm_assign_on_mkdir); + +out_unlock: + mutex_unlock(&rdtgroup_mutex); + + return ret; +} + +ssize_t resctrl_mbm_assign_on_mkdir_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) +{ + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); + bool value; + int ret; + + ret = kstrtobool(buf, &value); + if (ret) + return ret; + + mutex_lock(&rdtgroup_mutex); + rdt_last_cmd_clear(); + + if (!resctrl_arch_mbm_cntr_assign_enabled(r)) { + rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n"); + ret = -EINVAL; + goto out_unlock; + } + + r->mon.mbm_assign_on_mkdir = value; + +out_unlock: + mutex_unlock(&rdtgroup_mutex); + + return ret ?: nbytes; +} + /* - * Initialize the event list for the resource. + * mbm_cntr_free_all() - Clear all the counter ID configuration details in the + * domain @d. Called when mbm_assign_mode is changed. + */ +static void mbm_cntr_free_all(struct rdt_resource *r, struct rdt_mon_domain *d) +{ + memset(d->cntr_cfg, 0, sizeof(*d->cntr_cfg) * r->mon.num_mbm_cntrs); +} + +/* + * resctrl_reset_rmid_all() - Reset all non-architecture states for all the + * supported RMIDs. + */ +static void resctrl_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d) +{ + u32 idx_limit = resctrl_arch_system_num_rmid_idx(); + enum resctrl_event_id evt; + int idx; + + for_each_mbm_event_id(evt) { + if (!resctrl_is_mon_event_enabled(evt)) + continue; + idx = MBM_STATE_IDX(evt); + memset(d->mbm_states[idx], 0, sizeof(*d->mbm_states[0]) * idx_limit); + } +} + +/* + * rdtgroup_assign_cntr() - Assign/unassign the counter ID for the event, RMID + * pair in the domain. * - * Note that MBM events are also part of RDT_RESOURCE_L3 resource - * because as per the SDM the total and local memory bandwidth - * are enumerated as part of L3 monitoring. + * Assign the counter if @assign is true else unassign the counter. Reset the + * associated non-architectural state. */ -static void l3_mon_evt_init(struct rdt_resource *r) +static void rdtgroup_assign_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, + enum resctrl_event_id evtid, u32 rmid, u32 closid, + u32 cntr_id, bool assign) { - INIT_LIST_HEAD(&r->evt_list); + struct mbm_state *m; + + resctrl_arch_config_cntr(r, d, evtid, rmid, closid, cntr_id, assign); + + m = get_mbm_state(d, closid, rmid, evtid); + if (m) + memset(m, 0, sizeof(*m)); +} + +/* + * rdtgroup_alloc_assign_cntr() - Allocate a counter ID and assign it to the event + * pointed to by @mevt and the resctrl group @rdtgrp within the domain @d. + * + * Return: + * 0 on success, < 0 on failure. + */ +static int rdtgroup_alloc_assign_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, + struct rdtgroup *rdtgrp, struct mon_evt *mevt) +{ + int cntr_id; + + /* No action required if the counter is assigned already. */ + cntr_id = mbm_cntr_get(r, d, rdtgrp, mevt->evtid); + if (cntr_id >= 0) + return 0; - if (resctrl_arch_is_llc_occupancy_enabled()) - list_add_tail(&llc_occupancy_event.list, &r->evt_list); - if (resctrl_arch_is_mbm_total_enabled()) - list_add_tail(&mbm_total_event.list, &r->evt_list); - if (resctrl_arch_is_mbm_local_enabled()) - list_add_tail(&mbm_local_event.list, &r->evt_list); + cntr_id = mbm_cntr_alloc(r, d, rdtgrp, mevt->evtid); + if (cntr_id < 0) { + rdt_last_cmd_printf("Failed to allocate counter for %s in domain %d\n", + mevt->name, d->hdr.id); + return cntr_id; + } + + rdtgroup_assign_cntr(r, d, mevt->evtid, rdtgrp->mon.rmid, rdtgrp->closid, cntr_id, true); + + return 0; +} + +/* + * rdtgroup_assign_cntr_event() - Assign a hardware counter for the event in + * @mevt to the resctrl group @rdtgrp. Assign counters to all domains if @d is + * NULL; otherwise, assign the counter to the specified domain @d. + * + * If all counters in a domain are already in use, rdtgroup_alloc_assign_cntr() + * will fail. The assignment process will abort at the first failure encountered + * during domain traversal, which may result in the event being only partially + * assigned. + * + * Return: + * 0 on success, < 0 on failure. + */ +static int rdtgroup_assign_cntr_event(struct rdt_mon_domain *d, struct rdtgroup *rdtgrp, + struct mon_evt *mevt) +{ + struct rdt_resource *r = resctrl_arch_get_resource(mevt->rid); + int ret = 0; + + if (!d) { + list_for_each_entry(d, &r->mon_domains, hdr.list) { + ret = rdtgroup_alloc_assign_cntr(r, d, rdtgrp, mevt); + if (ret) + return ret; + } + } else { + ret = rdtgroup_alloc_assign_cntr(r, d, rdtgrp, mevt); + } + + return ret; +} + +/* + * rdtgroup_assign_cntrs() - Assign counters to MBM events. Called when + * a new group is created. + * + * Each group can accommodate two counters per domain: one for the total + * event and one for the local event. Assignments may fail due to the limited + * number of counters. However, it is not necessary to fail the group creation + * and thus no failure is returned. Users have the option to modify the + * counter assignments after the group has been created. + */ +void rdtgroup_assign_cntrs(struct rdtgroup *rdtgrp) +{ + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); + + if (!r->mon_capable || !resctrl_arch_mbm_cntr_assign_enabled(r) || + !r->mon.mbm_assign_on_mkdir) + return; + + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) + rdtgroup_assign_cntr_event(NULL, rdtgrp, + &mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID]); + + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) + rdtgroup_assign_cntr_event(NULL, rdtgrp, + &mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID]); +} + +/* + * rdtgroup_free_unassign_cntr() - Unassign and reset the counter ID configuration + * for the event pointed to by @mevt within the domain @d and resctrl group @rdtgrp. + */ +static void rdtgroup_free_unassign_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, + struct rdtgroup *rdtgrp, struct mon_evt *mevt) +{ + int cntr_id; + + cntr_id = mbm_cntr_get(r, d, rdtgrp, mevt->evtid); + + /* If there is no cntr_id assigned, nothing to do */ + if (cntr_id < 0) + return; + + rdtgroup_assign_cntr(r, d, mevt->evtid, rdtgrp->mon.rmid, rdtgrp->closid, cntr_id, false); + + mbm_cntr_free(d, cntr_id); +} + +/* + * rdtgroup_unassign_cntr_event() - Unassign a hardware counter associated with + * the event structure @mevt from the domain @d and the group @rdtgrp. Unassign + * the counters from all the domains if @d is NULL else unassign from @d. + */ +static void rdtgroup_unassign_cntr_event(struct rdt_mon_domain *d, struct rdtgroup *rdtgrp, + struct mon_evt *mevt) +{ + struct rdt_resource *r = resctrl_arch_get_resource(mevt->rid); + + if (!d) { + list_for_each_entry(d, &r->mon_domains, hdr.list) + rdtgroup_free_unassign_cntr(r, d, rdtgrp, mevt); + } else { + rdtgroup_free_unassign_cntr(r, d, rdtgrp, mevt); + } +} + +/* + * rdtgroup_unassign_cntrs() - Unassign the counters associated with MBM events. + * Called when a group is deleted. + */ +void rdtgroup_unassign_cntrs(struct rdtgroup *rdtgrp) +{ + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); + + if (!r->mon_capable || !resctrl_arch_mbm_cntr_assign_enabled(r)) + return; + + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) + rdtgroup_unassign_cntr_event(NULL, rdtgrp, + &mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID]); + + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) + rdtgroup_unassign_cntr_event(NULL, rdtgrp, + &mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID]); +} + +static int resctrl_parse_mem_transactions(char *tok, u32 *val) +{ + u32 temp_val = 0; + char *evt_str; + bool found; + int i; + +next_config: + if (!tok || tok[0] == '\0') { + *val = temp_val; + return 0; + } + + /* Start processing the strings for each memory transaction type */ + evt_str = strim(strsep(&tok, ",")); + found = false; + for (i = 0; i < NUM_MBM_TRANSACTIONS; i++) { + if (!strcmp(mbm_transactions[i].name, evt_str)) { + temp_val |= mbm_transactions[i].val; + found = true; + break; + } + } + + if (!found) { + rdt_last_cmd_printf("Invalid memory transaction type %s\n", evt_str); + return -EINVAL; + } + + goto next_config; +} + +/* + * rdtgroup_update_cntr_event - Update the counter assignments for the event + * in a group. + * @r: Resource to which update needs to be done. + * @rdtgrp: Resctrl group. + * @evtid: MBM monitor event. + */ +static void rdtgroup_update_cntr_event(struct rdt_resource *r, struct rdtgroup *rdtgrp, + enum resctrl_event_id evtid) +{ + struct rdt_mon_domain *d; + int cntr_id; + + list_for_each_entry(d, &r->mon_domains, hdr.list) { + cntr_id = mbm_cntr_get(r, d, rdtgrp, evtid); + if (cntr_id >= 0) + rdtgroup_assign_cntr(r, d, evtid, rdtgrp->mon.rmid, + rdtgrp->closid, cntr_id, true); + } +} + +/* + * resctrl_update_cntr_allrdtgrp - Update the counter assignments for the event + * for all the groups. + * @mevt MBM Monitor event. + */ +static void resctrl_update_cntr_allrdtgrp(struct mon_evt *mevt) +{ + struct rdt_resource *r = resctrl_arch_get_resource(mevt->rid); + struct rdtgroup *prgrp, *crgrp; + + /* + * Find all the groups where the event is assigned and update the + * configuration of existing assignments. + */ + list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { + rdtgroup_update_cntr_event(r, prgrp, mevt->evtid); + + list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list) + rdtgroup_update_cntr_event(r, crgrp, mevt->evtid); + } +} + +ssize_t event_filter_write(struct kernfs_open_file *of, char *buf, size_t nbytes, + loff_t off) +{ + struct mon_evt *mevt = rdt_kn_parent_priv(of->kn); + struct rdt_resource *r; + u32 evt_cfg = 0; + int ret = 0; + + /* Valid input requires a trailing newline */ + if (nbytes == 0 || buf[nbytes - 1] != '\n') + return -EINVAL; + + buf[nbytes - 1] = '\0'; + + cpus_read_lock(); + mutex_lock(&rdtgroup_mutex); + + rdt_last_cmd_clear(); + + r = resctrl_arch_get_resource(mevt->rid); + if (!resctrl_arch_mbm_cntr_assign_enabled(r)) { + rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n"); + ret = -EINVAL; + goto out_unlock; + } + + ret = resctrl_parse_mem_transactions(buf, &evt_cfg); + if (!ret && mevt->evt_cfg != evt_cfg) { + mevt->evt_cfg = evt_cfg; + resctrl_update_cntr_allrdtgrp(mevt); + } + +out_unlock: + mutex_unlock(&rdtgroup_mutex); + cpus_read_unlock(); + + return ret ?: nbytes; +} + +int resctrl_mbm_assign_mode_show(struct kernfs_open_file *of, + struct seq_file *s, void *v) +{ + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); + bool enabled; + + mutex_lock(&rdtgroup_mutex); + enabled = resctrl_arch_mbm_cntr_assign_enabled(r); + + if (r->mon.mbm_cntr_assignable) { + if (enabled) + seq_puts(s, "[mbm_event]\n"); + else + seq_puts(s, "[default]\n"); + + if (!IS_ENABLED(CONFIG_RESCTRL_ASSIGN_FIXED)) { + if (enabled) + seq_puts(s, "default\n"); + else + seq_puts(s, "mbm_event\n"); + } + } else { + seq_puts(s, "[default]\n"); + } + + mutex_unlock(&rdtgroup_mutex); + + return 0; +} + +ssize_t resctrl_mbm_assign_mode_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) +{ + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); + struct rdt_mon_domain *d; + int ret = 0; + bool enable; + + /* Valid input requires a trailing newline */ + if (nbytes == 0 || buf[nbytes - 1] != '\n') + return -EINVAL; + + buf[nbytes - 1] = '\0'; + + cpus_read_lock(); + mutex_lock(&rdtgroup_mutex); + + rdt_last_cmd_clear(); + + if (!strcmp(buf, "default")) { + enable = 0; + } else if (!strcmp(buf, "mbm_event")) { + if (r->mon.mbm_cntr_assignable) { + enable = 1; + } else { + ret = -EINVAL; + rdt_last_cmd_puts("mbm_event mode is not supported\n"); + goto out_unlock; + } + } else { + ret = -EINVAL; + rdt_last_cmd_puts("Unsupported assign mode\n"); + goto out_unlock; + } + + if (enable != resctrl_arch_mbm_cntr_assign_enabled(r)) { + ret = resctrl_arch_mbm_cntr_assign_set(r, enable); + if (ret) + goto out_unlock; + + /* Update the visibility of BMEC related files */ + resctrl_bmec_files_show(r, NULL, !enable); + + /* + * Initialize the default memory transaction values for + * total and local events. + */ + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) + mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask; + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) + mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask & + (READS_TO_LOCAL_MEM | + READS_TO_LOCAL_S_MEM | + NON_TEMP_WRITE_TO_LOCAL_MEM); + /* Enable auto assignment when switching to "mbm_event" mode */ + if (enable) + r->mon.mbm_assign_on_mkdir = true; + /* + * Reset all the non-achitectural RMID state and assignable counters. + */ + list_for_each_entry(d, &r->mon_domains, hdr.list) { + mbm_cntr_free_all(r, d); + resctrl_reset_rmid_all(r, d); + } + } + +out_unlock: + mutex_unlock(&rdtgroup_mutex); + cpus_read_unlock(); + + return ret ?: nbytes; +} + +int resctrl_num_mbm_cntrs_show(struct kernfs_open_file *of, + struct seq_file *s, void *v) +{ + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); + struct rdt_mon_domain *dom; + bool sep = false; + + cpus_read_lock(); + mutex_lock(&rdtgroup_mutex); + + list_for_each_entry(dom, &r->mon_domains, hdr.list) { + if (sep) + seq_putc(s, ';'); + + seq_printf(s, "%d=%d", dom->hdr.id, r->mon.num_mbm_cntrs); + sep = true; + } + seq_putc(s, '\n'); + + mutex_unlock(&rdtgroup_mutex); + cpus_read_unlock(); + return 0; +} + +int resctrl_available_mbm_cntrs_show(struct kernfs_open_file *of, + struct seq_file *s, void *v) +{ + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); + struct rdt_mon_domain *dom; + bool sep = false; + u32 cntrs, i; + int ret = 0; + + cpus_read_lock(); + mutex_lock(&rdtgroup_mutex); + + rdt_last_cmd_clear(); + + if (!resctrl_arch_mbm_cntr_assign_enabled(r)) { + rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n"); + ret = -EINVAL; + goto out_unlock; + } + + list_for_each_entry(dom, &r->mon_domains, hdr.list) { + if (sep) + seq_putc(s, ';'); + + cntrs = 0; + for (i = 0; i < r->mon.num_mbm_cntrs; i++) { + if (!dom->cntr_cfg[i].rdtgrp) + cntrs++; + } + + seq_printf(s, "%d=%u", dom->hdr.id, cntrs); + sep = true; + } + seq_putc(s, '\n'); + +out_unlock: + mutex_unlock(&rdtgroup_mutex); + cpus_read_unlock(); + + return ret; +} + +int mbm_L3_assignments_show(struct kernfs_open_file *of, struct seq_file *s, void *v) +{ + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); + struct rdt_mon_domain *d; + struct rdtgroup *rdtgrp; + struct mon_evt *mevt; + int ret = 0; + bool sep; + + rdtgrp = rdtgroup_kn_lock_live(of->kn); + if (!rdtgrp) { + ret = -ENOENT; + goto out_unlock; + } + + rdt_last_cmd_clear(); + if (!resctrl_arch_mbm_cntr_assign_enabled(r)) { + rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n"); + ret = -EINVAL; + goto out_unlock; + } + + for_each_mon_event(mevt) { + if (mevt->rid != r->rid || !mevt->enabled || !resctrl_is_mbm_event(mevt->evtid)) + continue; + + sep = false; + seq_printf(s, "%s:", mevt->name); + list_for_each_entry(d, &r->mon_domains, hdr.list) { + if (sep) + seq_putc(s, ';'); + + if (mbm_cntr_get(r, d, rdtgrp, mevt->evtid) < 0) + seq_printf(s, "%d=_", d->hdr.id); + else + seq_printf(s, "%d=e", d->hdr.id); + + sep = true; + } + seq_putc(s, '\n'); + } + +out_unlock: + rdtgroup_kn_unlock(of->kn); + + return ret; +} + +/* + * mbm_get_mon_event_by_name() - Return the mon_evt entry for the matching + * event name. + */ +static struct mon_evt *mbm_get_mon_event_by_name(struct rdt_resource *r, char *name) +{ + struct mon_evt *mevt; + + for_each_mon_event(mevt) { + if (mevt->rid == r->rid && mevt->enabled && + resctrl_is_mbm_event(mevt->evtid) && + !strcmp(mevt->name, name)) + return mevt; + } + + return NULL; +} + +static int rdtgroup_modify_assign_state(char *assign, struct rdt_mon_domain *d, + struct rdtgroup *rdtgrp, struct mon_evt *mevt) +{ + int ret = 0; + + if (!assign || strlen(assign) != 1) + return -EINVAL; + + switch (*assign) { + case 'e': + ret = rdtgroup_assign_cntr_event(d, rdtgrp, mevt); + break; + case '_': + rdtgroup_unassign_cntr_event(d, rdtgrp, mevt); + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static int resctrl_parse_mbm_assignment(struct rdt_resource *r, struct rdtgroup *rdtgrp, + char *event, char *tok) +{ + struct rdt_mon_domain *d; + unsigned long dom_id = 0; + char *dom_str, *id_str; + struct mon_evt *mevt; + int ret; + + mevt = mbm_get_mon_event_by_name(r, event); + if (!mevt) { + rdt_last_cmd_printf("Invalid event %s\n", event); + return -ENOENT; + } + +next: + if (!tok || tok[0] == '\0') + return 0; + + /* Start processing the strings for each domain */ + dom_str = strim(strsep(&tok, ";")); + + id_str = strsep(&dom_str, "="); + + /* Check for domain id '*' which means all domains */ + if (id_str && *id_str == '*') { + ret = rdtgroup_modify_assign_state(dom_str, NULL, rdtgrp, mevt); + if (ret) + rdt_last_cmd_printf("Assign operation '%s:*=%s' failed\n", + event, dom_str); + return ret; + } else if (!id_str || kstrtoul(id_str, 10, &dom_id)) { + rdt_last_cmd_puts("Missing domain id\n"); + return -EINVAL; + } + + /* Verify if the dom_id is valid */ + list_for_each_entry(d, &r->mon_domains, hdr.list) { + if (d->hdr.id == dom_id) { + ret = rdtgroup_modify_assign_state(dom_str, d, rdtgrp, mevt); + if (ret) { + rdt_last_cmd_printf("Assign operation '%s:%ld=%s' failed\n", + event, dom_id, dom_str); + return ret; + } + goto next; + } + } + + rdt_last_cmd_printf("Invalid domain id %ld\n", dom_id); + return -EINVAL; +} + +ssize_t mbm_L3_assignments_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) +{ + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); + struct rdtgroup *rdtgrp; + char *token, *event; + int ret = 0; + + /* Valid input requires a trailing newline */ + if (nbytes == 0 || buf[nbytes - 1] != '\n') + return -EINVAL; + + buf[nbytes - 1] = '\0'; + + rdtgrp = rdtgroup_kn_lock_live(of->kn); + if (!rdtgrp) { + rdtgroup_kn_unlock(of->kn); + return -ENOENT; + } + rdt_last_cmd_clear(); + + if (!resctrl_arch_mbm_cntr_assign_enabled(r)) { + rdt_last_cmd_puts("mbm_event mode is not enabled\n"); + rdtgroup_kn_unlock(of->kn); + return -EINVAL; + } + + while ((token = strsep(&buf, "\n")) != NULL) { + /* + * The write command follows the following format: + * ":=" + * Extract the event name first. + */ + event = strsep(&token, ":"); + + ret = resctrl_parse_mbm_assignment(r, rdtgrp, event, token); + if (ret) + break; + } + + rdtgroup_kn_unlock(of->kn); + + return ret ?: nbytes; } /** @@ -900,24 +1800,41 @@ int resctrl_mon_resource_init(void) if (ret) return ret; - l3_mon_evt_init(r); - if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_TOTAL_EVENT_ID)) { - mbm_total_event.configurable = true; + mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].configurable = true; resctrl_file_fflags_init("mbm_total_bytes_config", RFTYPE_MON_INFO | RFTYPE_RES_CACHE); } if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_LOCAL_EVENT_ID)) { - mbm_local_event.configurable = true; + mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].configurable = true; resctrl_file_fflags_init("mbm_local_bytes_config", RFTYPE_MON_INFO | RFTYPE_RES_CACHE); } - if (resctrl_arch_is_mbm_local_enabled()) + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) mba_mbps_default_event = QOS_L3_MBM_LOCAL_EVENT_ID; - else if (resctrl_arch_is_mbm_total_enabled()) + else if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) mba_mbps_default_event = QOS_L3_MBM_TOTAL_EVENT_ID; + if (r->mon.mbm_cntr_assignable) { + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) + mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask; + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) + mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask & + (READS_TO_LOCAL_MEM | + READS_TO_LOCAL_S_MEM | + NON_TEMP_WRITE_TO_LOCAL_MEM); + r->mon.mbm_assign_on_mkdir = true; + resctrl_file_fflags_init("num_mbm_cntrs", + RFTYPE_MON_INFO | RFTYPE_RES_CACHE); + resctrl_file_fflags_init("available_mbm_cntrs", + RFTYPE_MON_INFO | RFTYPE_RES_CACHE); + resctrl_file_fflags_init("event_filter", RFTYPE_ASSIGN_CONFIG); + resctrl_file_fflags_init("mbm_assign_on_mkdir", RFTYPE_MON_INFO | + RFTYPE_RES_CACHE); + resctrl_file_fflags_init("mbm_L3_assignments", RFTYPE_MON_BASE); + } + return 0; } diff --git a/fs/resctrl/pseudo_lock.c b/fs/resctrl/pseudo_lock.c index 87bbc2605de12..4086e61df3e1c 100644 --- a/fs/resctrl/pseudo_lock.c +++ b/fs/resctrl/pseudo_lock.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -694,6 +695,7 @@ static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel) int ret = -1; cpus_read_lock(); + get_online_mems(); mutex_lock(&rdtgroup_mutex); if (rdtgrp->flags & RDT_DELETED) { @@ -741,6 +743,7 @@ static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel) out: mutex_unlock(&rdtgroup_mutex); + put_online_mems(); cpus_read_unlock(); return ret; } diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 77d08229d8550..3c9981f545017 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -16,8 +16,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -86,6 +88,9 @@ enum resctrl_event_id mba_mbps_default_event; static bool resctrl_debug; +/* Enable wacky behaviour that is not supported upstream. */ +DEFINE_STATIC_KEY_FALSE(resctrl_abi_playground); + void rdt_last_cmd_clear(void) { lockdep_assert_held(&rdtgroup_mutex); @@ -123,14 +128,8 @@ void rdt_staged_configs_clear(void) static bool resctrl_is_mbm_enabled(void) { - return (resctrl_arch_is_mbm_total_enabled() || - resctrl_arch_is_mbm_local_enabled()); -} - -static bool resctrl_is_mbm_event(int e) -{ - return (e >= QOS_L3_MBM_TOTAL_EVENT_ID && - e <= QOS_L3_MBM_LOCAL_EVENT_ID); + return (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID) || + resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)); } /* @@ -196,7 +195,7 @@ static int closid_alloc(void) lockdep_assert_held(&rdtgroup_mutex); if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID) && - resctrl_arch_is_llc_occupancy_enabled()) { + resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID)) { cleanest_closid = resctrl_find_cleanest_closid(); if (cleanest_closid < 0) return cleanest_closid; @@ -766,10 +765,65 @@ static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp, return ret; } +static int rdtgroup_move_iommu(int iommu_group_id, struct rdtgroup *rdtgrp, + struct kernfs_open_file *of) +{ + const struct cred *cred = current_cred(); + struct iommu_group *iommu_group; + int err; + + if (!uid_eq(cred->euid, GLOBAL_ROOT_UID)) { + rdt_last_cmd_printf("No permission to move iommu_group %d\n", + iommu_group_id); + return -EPERM; + } + + iommu_group = iommu_group_get_by_id(iommu_group_id); + if (!iommu_group) { + rdt_last_cmd_printf("No matching iommu_group %d\n", + iommu_group_id); + return -ESRCH; + } + + if (rdtgrp->type == RDTMON_GROUP && + !resctrl_arch_match_iommu_closid(iommu_group, + rdtgrp->mon.parent->closid)) { + rdt_last_cmd_puts("Can't move iommu_group to different control group\n"); + err = -EINVAL; + } else { + err = resctrl_arch_set_iommu_closid_rmid(iommu_group, + rdtgrp->closid, + rdtgrp->mon.rmid); + } + + iommu_group_put(iommu_group); + + return err; +} + +static bool string_is_iommu_group(char *buf, int *val) +{ + if (!IS_ENABLED(CONFIG_RESCTRL_IOMMU) || + !static_branch_unlikely(&resctrl_abi_playground)) + return false; + + if (strlen(buf) <= strlen("iommu_group:")) + return false; + + if (strncmp(buf, "iommu_group:", strlen("iommu_group:"))) + return false; + + buf += strlen("iommu_group:"); + + return !kstrtoint(buf, 0, val); +} + static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct rdtgroup *rdtgrp; + int iommu_group_id; + bool is_iommu; char *pid_str; int ret = 0; pid_t pid; @@ -791,7 +845,10 @@ static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of, while (buf && buf[0] != '\0' && buf[0] != '\n') { pid_str = strim(strsep(&buf, ",")); - if (kstrtoint(pid_str, 0, &pid)) { + is_iommu = string_is_iommu_group(pid_str, &iommu_group_id); + if (is_iommu) + ret = rdtgroup_move_iommu(iommu_group_id, rdtgrp, of); + else if (kstrtoint(pid_str, 0, &pid)) { rdt_last_cmd_printf("Task list parsing error pid %s\n", pid_str); ret = -EINVAL; break; @@ -816,6 +873,42 @@ static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of, return ret ?: nbytes; } +static bool iommu_matches_rdtgroup(struct iommu_group *group, struct rdtgroup *r) +{ + if (r->type == RDTCTRL_GROUP) + return resctrl_arch_match_iommu_closid(group, r->closid); + + return resctrl_arch_match_iommu_closid_rmid(group, r->closid, + r->mon.rmid); +} + +static void show_rdt_iommu(struct rdtgroup *r, struct seq_file *s) +{ + struct kset *iommu_groups; + struct iommu_group *group; + struct kobject *group_kobj = NULL; + + if (!IS_ENABLED(CONFIG_RESCTRL_IOMMU) || + !static_branch_unlikely(&resctrl_abi_playground)) + return; + + iommu_groups = iommu_get_group_kset(); + + while ((group_kobj = kset_get_next_obj(iommu_groups, group_kobj))) { + /* iommu_group_get_from_kobj() wants to drop a reference */ + kobject_get(group_kobj); + + group = iommu_group_get_from_kobj(group_kobj); + if (!group) + continue; + + if (iommu_matches_rdtgroup(group, r)) + seq_printf(s, "iommu_group:%s\n", group_kobj->name); + } + + kset_put(iommu_groups); +} + static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s) { struct task_struct *p, *t; @@ -830,6 +923,8 @@ static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s) } } rcu_read_unlock(); + + show_rdt_iommu(r, s); } static int rdtgroup_tasks_show(struct kernfs_open_file *of, @@ -981,7 +1076,7 @@ static int rdt_last_cmd_status_show(struct kernfs_open_file *of, return 0; } -static void *rdt_kn_parent_priv(struct kernfs_node *kn) +void *rdt_kn_parent_priv(struct kernfs_node *kn) { /* * The parent pointer is only valid within RCU section since it can be @@ -1004,9 +1099,8 @@ static int rdt_default_ctrl_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); - struct rdt_resource *r = s->res; - seq_printf(seq, "%x\n", resctrl_get_default_ctrl(r)); + seq_printf(seq, "%x\n", resctrl_get_schema_default_ctrl(s)); return 0; } @@ -1062,6 +1156,7 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, u32 ctrl_val; cpus_read_lock(); + get_online_mems(); mutex_lock(&rdtgroup_mutex); hw_shareable = r->cache.shareable_bits; list_for_each_entry(dom, &r->ctrl_domains, hdr.list) { @@ -1122,6 +1217,7 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, } seq_putc(seq, '\n'); mutex_unlock(&rdtgroup_mutex); + put_online_mems(); cpus_read_unlock(); return 0; } @@ -1130,9 +1226,8 @@ static int rdt_min_bw_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); - struct rdt_resource *r = s->res; - seq_printf(seq, "%u\n", r->membw.min_bw); + seq_printf(seq, "%u\n", s->membw.min_bw); return 0; } @@ -1141,7 +1236,7 @@ static int rdt_num_rmids_show(struct kernfs_open_file *of, { struct rdt_resource *r = rdt_kn_parent_priv(of->kn); - seq_printf(seq, "%d\n", r->num_rmid); + seq_printf(seq, "%d\n", r->mon.num_rmid); return 0; } @@ -1152,9 +1247,12 @@ static int rdt_mon_features_show(struct kernfs_open_file *of, struct rdt_resource *r = rdt_kn_parent_priv(of->kn); struct mon_evt *mevt; - list_for_each_entry(mevt, &r->evt_list, list) { + for_each_mon_event(mevt) { + if (mevt->rid != r->rid || !mevt->enabled) + continue; seq_printf(seq, "%s\n", mevt->name); - if (mevt->configurable) + if (mevt->configurable && + !resctrl_arch_mbm_cntr_assign_enabled(r)) seq_printf(seq, "%s_config\n", mevt->name); } @@ -1165,9 +1263,8 @@ static int rdt_bw_gran_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); - struct rdt_resource *r = s->res; - seq_printf(seq, "%u\n", r->membw.bw_gran); + seq_printf(seq, "%u\n", s->membw.bw_gran); return 0; } @@ -1177,7 +1274,7 @@ static int rdt_delay_linear_show(struct kernfs_open_file *of, struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; - seq_printf(seq, "%u\n", r->membw.delay_linear); + seq_printf(seq, "%u\n", r->mba.delay_linear); return 0; } @@ -1195,7 +1292,7 @@ static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of, struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); struct rdt_resource *r = s->res; - switch (r->membw.throttle_mode) { + switch (r->mba.throttle_mode) { case THREAD_THROTTLE_PER_THREAD: seq_puts(seq, "per-thread\n"); return 0; @@ -1507,7 +1604,7 @@ unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct cacheinfo *ci; int num_b; - if (WARN_ON_ONCE(r->ctrl_scope != RESCTRL_L2_CACHE && r->ctrl_scope != RESCTRL_L3_CACHE)) + if (WARN_ON_ONCE(r->schema_fmt != RESCTRL_SCHEMA_BITMAP)) return size; num_b = bitmap_weight(&cbm, r->cache.cbm_len); @@ -1530,7 +1627,7 @@ bool is_mba_sc(struct rdt_resource *r) if (r->rid != RDT_RESOURCE_MBA) return false; - return r->membw.mba_sc; + return r->mba.mba_sc; } /* @@ -1594,11 +1691,11 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, ctrl = resctrl_arch_get_config(r, d, closid, type); - if (r->rid == RDT_RESOURCE_MBA || - r->rid == RDT_RESOURCE_SMBA) - size = ctrl; - else + + if (schema->schema_fmt == RESCTRL_SCHEMA_BITMAP) size = rdtgroup_cbm_to_size(r, d, ctrl); + else + size = ctrl; } seq_printf(s, "%d=%u", d->hdr.id, size); sep = true; @@ -1625,6 +1722,7 @@ static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid bool sep = false; cpus_read_lock(); + get_online_mems(); mutex_lock(&rdtgroup_mutex); list_for_each_entry(dom, &r->mon_domains, hdr.list) { @@ -1643,6 +1741,7 @@ static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid seq_puts(s, "\n"); mutex_unlock(&rdtgroup_mutex); + put_online_mems(); cpus_read_unlock(); return 0; @@ -1668,6 +1767,30 @@ static int mbm_local_bytes_config_show(struct kernfs_open_file *of, return 0; } +static int resctrl_schema_format_show(struct kernfs_open_file *of, + struct seq_file *seq, void *v) +{ + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); + + switch (s->schema_fmt) { + case RESCTRL_SCHEMA_BITMAP: + seq_puts(seq, "bitmap\n"); + break; + case RESCTRL_SCHEMA_PERCENT: + seq_puts(seq, "percentage\n"); + break; + case RESCTRL_SCHEMA_MBPS: + seq_puts(seq, "mbps\n"); + break; + /* The way these schema behave isn't discoverable from resctrl */ + case RESCTRL_SCHEMA__AMD_MBA: + seq_puts(seq, "platform\n"); + break; + } + + return 0; +} + static void mbm_config_write_domain(struct rdt_resource *r, struct rdt_mon_domain *d, u32 evtid, u32 val) { @@ -1735,9 +1858,9 @@ static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid) } /* Value from user cannot be more than the supported set of events */ - if ((val & r->mbm_cfg_mask) != val) { + if ((val & r->mon.mbm_cfg_mask) != val) { rdt_last_cmd_printf("Invalid event configuration: max valid mask is 0x%02x\n", - r->mbm_cfg_mask); + r->mon.mbm_cfg_mask); return -EINVAL; } @@ -1763,6 +1886,7 @@ static ssize_t mbm_total_bytes_config_write(struct kernfs_open_file *of, return -EINVAL; cpus_read_lock(); + get_online_mems(); mutex_lock(&rdtgroup_mutex); rdt_last_cmd_clear(); @@ -1772,6 +1896,7 @@ static ssize_t mbm_total_bytes_config_write(struct kernfs_open_file *of, ret = mon_config_write(r, buf, QOS_L3_MBM_TOTAL_EVENT_ID); mutex_unlock(&rdtgroup_mutex); + put_online_mems(); cpus_read_unlock(); return ret ?: nbytes; @@ -1789,6 +1914,7 @@ static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of, return -EINVAL; cpus_read_lock(); + get_online_mems(); mutex_lock(&rdtgroup_mutex); rdt_last_cmd_clear(); @@ -1798,11 +1924,50 @@ static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of, ret = mon_config_write(r, buf, QOS_L3_MBM_LOCAL_EVENT_ID); mutex_unlock(&rdtgroup_mutex); + put_online_mems(); cpus_read_unlock(); return ret ?: nbytes; } +/* + * resctrl_bmec_files_show() — Controls the visibility of BMEC-related resctrl + * files. When @show is true, the files are displayed; when false, the files + * are hidden. + * Don't treat kernfs_find_and_get failure as an error, since this function may + * be called regardless of whether BMEC is supported or the event is enabled. + */ +void resctrl_bmec_files_show(struct rdt_resource *r, struct kernfs_node *l3_mon_kn, + bool show) +{ + struct kernfs_node *kn_config, *mon_kn = NULL; + char name[32]; + + if (!l3_mon_kn) { + sprintf(name, "%s_MON", r->name); + mon_kn = kernfs_find_and_get(kn_info, name); + if (!mon_kn) + return; + l3_mon_kn = mon_kn; + } + + kn_config = kernfs_find_and_get(l3_mon_kn, "mbm_total_bytes_config"); + if (kn_config) { + kernfs_show(kn_config, show); + kernfs_put(kn_config); + } + + kn_config = kernfs_find_and_get(l3_mon_kn, "mbm_local_bytes_config"); + if (kn_config) { + kernfs_show(kn_config, show); + kernfs_put(kn_config); + } + + /* Release the reference only if it was acquired */ + if (mon_kn) + kernfs_put(mon_kn); +} + /* rdtgroup information files for one cache resource. */ static struct rftype res_common_files[] = { { @@ -1812,6 +1977,13 @@ static struct rftype res_common_files[] = { .seq_show = rdt_last_cmd_status_show, .fflags = RFTYPE_TOP_INFO, }, + { + .name = "mbm_assign_on_mkdir", + .mode = 0644, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = resctrl_mbm_assign_on_mkdir_show, + .write = resctrl_mbm_assign_on_mkdir_write, + }, { .name = "num_closids", .mode = 0444, @@ -1826,6 +1998,12 @@ static struct rftype res_common_files[] = { .seq_show = rdt_mon_features_show, .fflags = RFTYPE_MON_INFO, }, + { + .name = "available_mbm_cntrs", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = resctrl_available_mbm_cntrs_show, + }, { .name = "num_rmids", .mode = 0444, @@ -1840,6 +2018,19 @@ static struct rftype res_common_files[] = { .seq_show = rdt_default_ctrl_show, .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE, }, + { + .name = "num_mbm_cntrs", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = resctrl_num_mbm_cntrs_show, + }, + { + .name = "bitmap_mask", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = rdt_default_ctrl_show, + .fflags = RFTYPE_CTRL_INFO | RFTYPE_SCHEMA_BITMAP, + }, { .name = "min_cbm_bits", .mode = 0444, @@ -1847,6 +2038,13 @@ static struct rftype res_common_files[] = { .seq_show = rdt_min_cbm_bits_show, .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE, }, + { + .name = "bitmaps_min_bits", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = rdt_min_cbm_bits_show, + .fflags = RFTYPE_CTRL_INFO | RFTYPE_SCHEMA_BITMAP, + }, { .name = "shareable_bits", .mode = 0444, @@ -1868,6 +2066,13 @@ static struct rftype res_common_files[] = { .seq_show = rdt_min_bw_show, .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB, }, + { + .name = "percent_min", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = rdt_min_bw_show, + .fflags = RFTYPE_CTRL_INFO | RFTYPE_SCHEMA_PERCENT, + }, { .name = "bandwidth_gran", .mode = 0444, @@ -1875,6 +2080,13 @@ static struct rftype res_common_files[] = { .seq_show = rdt_bw_gran_show, .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB, }, + { + .name = "percent_gran", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = rdt_bw_gran_show, + .fflags = RFTYPE_CTRL_INFO | RFTYPE_SCHEMA_PERCENT, + }, { .name = "delay_linear", .mode = 0444, @@ -1915,6 +2127,28 @@ static struct rftype res_common_files[] = { .seq_show = mbm_local_bytes_config_show, .write = mbm_local_bytes_config_write, }, + { + .name = "event_filter", + .mode = 0644, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = event_filter_show, + .write = event_filter_write, + }, + { + .name = "mbm_L3_assignments", + .mode = 0644, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = mbm_L3_assignments_show, + .write = mbm_L3_assignments_write, + }, + { + .name = "mbm_assign_mode", + .mode = 0644, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = resctrl_mbm_assign_mode_show, + .write = resctrl_mbm_assign_mode_write, + .fflags = RFTYPE_MON_INFO | RFTYPE_RES_CACHE, + }, { .name = "cpus", .mode = 0644, @@ -1991,6 +2225,14 @@ static struct rftype res_common_files[] = { .seq_show = rdtgroup_closid_show, .fflags = RFTYPE_CTRL_BASE | RFTYPE_DEBUG, }, + { + .name = "schema_format", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = resctrl_schema_format_show, + .fflags = RFTYPE_CTRL_INFO, + }, + }; static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags) @@ -2047,13 +2289,13 @@ static void thread_throttle_mode_init(void) r_mba = resctrl_arch_get_resource(RDT_RESOURCE_MBA); if (r_mba->alloc_capable && - r_mba->membw.throttle_mode != THREAD_THROTTLE_UNDEFINED) - throttle_mode = r_mba->membw.throttle_mode; + r_mba->mba.throttle_mode != THREAD_THROTTLE_UNDEFINED) + throttle_mode = r_mba->mba.throttle_mode; r_smba = resctrl_arch_get_resource(RDT_RESOURCE_SMBA); if (r_smba->alloc_capable && - r_smba->membw.throttle_mode != THREAD_THROTTLE_UNDEFINED) - throttle_mode = r_smba->membw.throttle_mode; + r_smba->mba.throttle_mode != THREAD_THROTTLE_UNDEFINED) + throttle_mode = r_smba->mba.throttle_mode; if (throttle_mode == THREAD_THROTTLE_UNDEFINED) return; @@ -2168,10 +2410,48 @@ int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name, return ret; } +static int resctrl_mkdir_event_configs(struct rdt_resource *r, struct kernfs_node *l3_mon_kn) +{ + struct kernfs_node *kn_subdir, *kn_subdir2; + struct mon_evt *mevt; + int ret; + + kn_subdir = kernfs_create_dir(l3_mon_kn, "event_configs", l3_mon_kn->mode, NULL); + if (IS_ERR(kn_subdir)) + return PTR_ERR(kn_subdir); + + ret = rdtgroup_kn_set_ugid(kn_subdir); + if (ret) + return ret; + + for_each_mon_event(mevt) { + if (mevt->rid != r->rid || !mevt->enabled || !resctrl_is_mbm_event(mevt->evtid)) + continue; + + kn_subdir2 = kernfs_create_dir(kn_subdir, mevt->name, kn_subdir->mode, mevt); + if (IS_ERR(kn_subdir2)) { + ret = PTR_ERR(kn_subdir2); + goto out; + } + + ret = rdtgroup_kn_set_ugid(kn_subdir2); + if (ret) + goto out; + + ret = rdtgroup_add_files(kn_subdir2, RFTYPE_ASSIGN_CONFIG); + if (ret) + break; + } + +out: + return ret; +} + static int rdtgroup_mkdir_info_resdir(void *priv, char *name, unsigned long fflags) { struct kernfs_node *kn_subdir; + struct rdt_resource *r; int ret; kn_subdir = kernfs_create_dir(kn_info, name, @@ -2184,8 +2464,25 @@ static int rdtgroup_mkdir_info_resdir(void *priv, char *name, return ret; ret = rdtgroup_add_files(kn_subdir, fflags); - if (!ret) - kernfs_activate(kn_subdir); + if (ret) + return ret; + + if ((fflags & RFTYPE_MON_INFO) == RFTYPE_MON_INFO) { + r = priv; + if (r->mon.mbm_cntr_assignable) { + ret = resctrl_mkdir_event_configs(r, kn_subdir); + if (ret) + return ret; + /* + * Hide BMEC related files if mbm_event mode + * is enabled. + */ + if (resctrl_arch_mbm_cntr_assign_enabled(r)) + resctrl_bmec_files_show(r, kn_subdir, false); + } + } + + kernfs_activate(kn_subdir); return ret; } @@ -2201,7 +2498,35 @@ static unsigned long fflags_from_resource(struct rdt_resource *r) return RFTYPE_RES_MB; } - return WARN_ON_ONCE(1); + return 0; +} + +static u32 fflags_from_schema(struct resctrl_schema *s) +{ + struct rdt_resource *r = s->res; + u32 fflags = 0; + + /* Some resources are configured purely from their rid */ + fflags |= fflags_from_resource(r); + if (fflags) + return fflags; + + switch (s->schema_fmt) { + case RESCTRL_SCHEMA_BITMAP: + fflags |= RFTYPE_SCHEMA_BITMAP; + break; + case RESCTRL_SCHEMA_PERCENT: + fflags |= RFTYPE_SCHEMA_PERCENT; + break; + case RESCTRL_SCHEMA_MBPS: + fflags |= RFTYPE_SCHEMA_MBPS; + break; + case RESCTRL_SCHEMA__AMD_MBA: + /* No standard files are exposed */ + break; + } + + return fflags; } static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) @@ -2224,7 +2549,7 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) /* loop over enabled controls, these are all alloc_capable */ list_for_each_entry(s, &resctrl_schema_all, list) { r = s->res; - fflags = fflags_from_resource(r) | RFTYPE_CTRL_INFO; + fflags = fflags_from_schema(s) | RFTYPE_CTRL_INFO; ret = rdtgroup_mkdir_info_resdir(s, s->name, fflags); if (ret) goto out_destroy; @@ -2281,7 +2606,7 @@ mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp, static inline bool is_mba_linear(void) { - return resctrl_arch_get_resource(RDT_RESOURCE_MBA)->membw.delay_linear; + return resctrl_arch_get_resource(RDT_RESOURCE_MBA)->mba.delay_linear; } static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_ctrl_domain *d) @@ -2339,7 +2664,7 @@ static int set_mba_sc(bool mba_sc) if (!supports_mba_mbps() || mba_sc == is_mba_sc(r)) return -EINVAL; - r->membw.mba_sc = mba_sc; + r->mba.mba_sc = mba_sc; rdtgroup_default.mba_mbps_event = mba_mbps_default_event; @@ -2411,6 +2736,7 @@ struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn) rdtgroup_kn_get(rdtgrp, kn); cpus_read_lock(); + get_online_mems(); mutex_lock(&rdtgroup_mutex); /* Was this group deleted while we waited? */ @@ -2428,6 +2754,7 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn) return; mutex_unlock(&rdtgroup_mutex); + put_online_mems(); cpus_read_unlock(); rdtgroup_kn_put(rdtgrp, kn); @@ -2441,6 +2768,7 @@ static void rdt_disable_ctx(void) { resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false); resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false); + resctrl_arch_set_mb_uses_numa_nid(false); set_mba_sc(false); resctrl_debug = false; @@ -2471,8 +2799,17 @@ static int rdt_enable_ctx(struct rdt_fs_context *ctx) if (ctx->enable_debug) resctrl_debug = true; + if (ctx->mb_uses_numa_nid) { + ret = resctrl_arch_set_mb_uses_numa_nid(true); + if (ret) + goto out_debug; + } + return 0; +out_debug: + resctrl_debug = false; + set_mba_sc(false); out_cdpl3: resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false); out_cdpl2: @@ -2528,11 +2865,28 @@ static int schemata_list_add(struct rdt_resource *r, enum resctrl_conf_type type if (cl > max_name_width) max_name_width = cl; - switch (r->schema_fmt) { + s->schema_fmt = r->schema_fmt; + s->membw = r->membw; + + /* + * When mba_sc() is enabled the format used by user space is different + * to that expected by hardware. The conversion is done by + * update_mba_bw(). + */ + if (is_mba_sc(r)) { + s->schema_fmt = RESCTRL_SCHEMA_MBPS; + s->membw.min_bw = 0; + s->membw.max_bw = MBA_MAX_MBPS; + s->membw.bw_gran = 1; + } + + switch (s->schema_fmt) { case RESCTRL_SCHEMA_BITMAP: s->fmt_str = "%d=%x"; break; - case RESCTRL_SCHEMA_RANGE: + case RESCTRL_SCHEMA_PERCENT: + case RESCTRL_SCHEMA_MBPS: + case RESCTRL_SCHEMA__AMD_MBA: s->fmt_str = "%d=%u"; break; } @@ -2581,6 +2935,42 @@ static void schemata_list_destroy(void) } } +static void hack_file_mode(const char *name, u16 mode) +{ + struct rftype *rfts, *rft; + int len; + + mutex_lock(&rdtgroup_mutex); + + rfts = res_common_files; + len = ARRAY_SIZE(res_common_files); + + for (rft = rfts; rft < rfts + len; rft++) { + if (!strcmp(rft->name, name)) + rft->mode = mode; + } + + mutex_unlock(&rdtgroup_mutex); +} + +static void enable_abi_playground(void) +{ + static_key_enable(&resctrl_abi_playground.key); + + /* Make the tasks file read only */ + if (IS_ENABLED(CONFIG_CGROUP_RESCTRL)) + hack_file_mode("tasks", 0444); +} + +static void disable_abi_playground(void) +{ + static_key_disable(&resctrl_abi_playground.key); + + /* Make the tasks file read/write only */ + if (IS_ENABLED(CONFIG_CGROUP_RESCTRL)) + hack_file_mode("tasks", 0644); +} + static int rdt_get_tree(struct fs_context *fc) { struct rdt_fs_context *ctx = rdt_fc2context(fc); @@ -2589,7 +2979,11 @@ static int rdt_get_tree(struct fs_context *fc) struct rdt_resource *r; int ret; + if (ctx->enable_abi_playground) + enable_abi_playground(); + cpus_read_lock(); + get_online_mems(); mutex_lock(&rdtgroup_mutex); /* * resctrl file system can only be mounted once. @@ -2637,6 +3031,8 @@ static int rdt_get_tree(struct fs_context *fc) if (ret < 0) goto out_info; + rdtgroup_assign_cntrs(&rdtgroup_default); + ret = mkdir_mondata_all(rdtgroup_default.kn, &rdtgroup_default, &kn_mondata); if (ret < 0) @@ -2675,8 +3071,10 @@ static int rdt_get_tree(struct fs_context *fc) if (resctrl_arch_mon_capable()) kernfs_remove(kn_mondata); out_mongrp: - if (resctrl_arch_mon_capable()) + if (resctrl_arch_mon_capable()) { + rdtgroup_unassign_cntrs(&rdtgroup_default); kernfs_remove(kn_mongrp); + } out_info: kernfs_remove(kn_info); out_closid_exit: @@ -2690,6 +3088,7 @@ static int rdt_get_tree(struct fs_context *fc) out: rdt_last_cmd_clear(); mutex_unlock(&rdtgroup_mutex); + put_online_mems(); cpus_read_unlock(); return ret; } @@ -2699,14 +3098,24 @@ enum rdt_param { Opt_cdpl2, Opt_mba_mbps, Opt_debug, + Opt_mb_uses_numa_nid, + Opt_not_abi_playground, nr__rdt_params }; static const struct fs_parameter_spec rdt_fs_parameters[] = { - fsparam_flag("cdp", Opt_cdp), - fsparam_flag("cdpl2", Opt_cdpl2), - fsparam_flag("mba_MBps", Opt_mba_mbps), - fsparam_flag("debug", Opt_debug), + fsparam_flag("cdp", Opt_cdp), + fsparam_flag("cdpl2", Opt_cdpl2), + fsparam_flag("mba_MBps", Opt_mba_mbps), + fsparam_flag("debug", Opt_debug), + fsparam_flag("mb_uses_numa_nid", Opt_mb_uses_numa_nid), + + /* + * Some of MPAM's out of tree code exposes things through resctrl + * that need much more discussion before they are considered for + * mainline. Add a mount option that can be used to hide these crimes. + */ + fsparam_flag("this_is_not_abi", Opt_not_abi_playground), {} }; @@ -2737,6 +3146,12 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param) case Opt_debug: ctx->enable_debug = true; return 0; + case Opt_mb_uses_numa_nid: + ctx->mb_uses_numa_nid = true; + return 0; + case Opt_not_abi_playground: + ctx->enable_abi_playground = true; + return 0; } return -EINVAL; @@ -2822,6 +3237,7 @@ static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp) head = &rdtgrp->mon.crdtgrp_list; list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) { + rdtgroup_unassign_cntrs(sentry); free_rmid(sentry->closid, sentry->mon.rmid); list_del(&sentry->mon.crdtgrp_list); @@ -2862,6 +3278,8 @@ static void rmdir_all_sub(void) cpumask_or(&rdtgroup_default.cpu_mask, &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask); + rdtgroup_unassign_cntrs(rdtgrp); + free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); kernfs_remove(rdtgrp->kn); @@ -2946,6 +3364,7 @@ static void resctrl_fs_teardown(void) return; rmdir_all_sub(); + rdtgroup_unassign_cntrs(&rdtgroup_default); mon_put_kn_priv(); rdt_pseudo_lock_release(); rdtgroup_default.mode = RDT_MODE_SHAREABLE; @@ -2959,6 +3378,7 @@ static void rdt_kill_sb(struct super_block *sb) struct rdt_resource *r; cpus_read_lock(); + get_online_mems(); mutex_lock(&rdtgroup_mutex); rdt_disable_ctx(); @@ -2975,7 +3395,11 @@ static void rdt_kill_sb(struct super_block *sb) resctrl_mounted = false; kernfs_kill_sb(sb); mutex_unlock(&rdtgroup_mutex); + put_online_mems(); cpus_read_unlock(); + + if (static_branch_unlikely(&resctrl_abi_playground)) + disable_abi_playground(); } static struct file_system_type rdt_fs_type = { @@ -3057,10 +3481,9 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d, struct mon_evt *mevt; int ret, domid; - if (WARN_ON(list_empty(&r->evt_list))) - return -EPERM; - - list_for_each_entry(mevt, &r->evt_list, list) { + for_each_mon_event(mevt) { + if (mevt->rid != r->rid || !mevt->enabled) + continue; domid = do_sum ? d->ci_id : d->hdr.id; priv = mon_get_kn_priv(r->rid, domid, mevt, do_sum); if (WARN_ON_ONCE(!priv)) @@ -3372,7 +3795,7 @@ static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid) } cfg = &d->staged_config[CDP_NONE]; - cfg->new_ctrl = resctrl_get_default_ctrl(r); + cfg->new_ctrl = resctrl_get_resource_default_ctrl(r); cfg->have_new_ctrl = true; } } @@ -3427,9 +3850,12 @@ static int mkdir_rdt_prepare_rmid_alloc(struct rdtgroup *rdtgrp) } rdtgrp->mon.rmid = ret; + rdtgroup_assign_cntrs(rdtgrp); + ret = mkdir_mondata_all(rdtgrp->kn, rdtgrp, &rdtgrp->mon.mon_data_kn); if (ret) { rdt_last_cmd_puts("kernfs subdir error\n"); + rdtgroup_unassign_cntrs(rdtgrp); free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); return ret; } @@ -3439,8 +3865,10 @@ static int mkdir_rdt_prepare_rmid_alloc(struct rdtgroup *rdtgrp) static void mkdir_rdt_prepare_rmid_free(struct rdtgroup *rgrp) { - if (resctrl_arch_mon_capable()) + if (resctrl_arch_mon_capable()) { + rdtgroup_unassign_cntrs(rgrp); free_rmid(rgrp->closid, rgrp->mon.rmid); + } } /* @@ -3716,6 +4144,9 @@ static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) update_closid_rmid(tmpmask, NULL); rdtgrp->flags = RDT_DELETED; + + rdtgroup_unassign_cntrs(rdtgrp); + free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); /* @@ -3763,6 +4194,8 @@ static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask); update_closid_rmid(tmpmask, NULL); + rdtgroup_unassign_cntrs(rdtgrp); + free_rmid(rdtgrp->closid, rdtgrp->mon.rmid); closid_free(rdtgrp->closid); @@ -3973,6 +4406,12 @@ static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf) if (resctrl_debug) seq_puts(seq, ",debug"); + if (resctrl_arch_get_mb_uses_numa_nid()) + seq_puts(seq, ",mb_uses_numa_nid"); + + if (static_branch_unlikely(&resctrl_abi_playground)) + seq_puts(seq, ",this_is_not_abi"); + return 0; } @@ -4022,9 +4461,14 @@ static void rdtgroup_setup_default(void) static void domain_destroy_mon_state(struct rdt_mon_domain *d) { + int idx; + + kfree(d->cntr_cfg); bitmap_free(d->rmid_busy_llc); - kfree(d->mbm_total); - kfree(d->mbm_local); + for_each_mbm_idx(idx) { + kfree(d->mbm_states[idx]); + d->mbm_states[idx] = NULL; + } } void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d) @@ -4050,7 +4494,7 @@ void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d if (resctrl_is_mbm_enabled()) cancel_delayed_work(&d->mbm_over); - if (resctrl_arch_is_llc_occupancy_enabled() && has_busy_rmid(d)) { + if (resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID) && has_busy_rmid(d)) { /* * When a package is going down, forcefully * decrement rmid->ebusy. There is no way to know @@ -4084,32 +4528,41 @@ void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_mon_domain *d) { u32 idx_limit = resctrl_arch_system_num_rmid_idx(); - size_t tsize; + size_t tsize = sizeof(*d->mbm_states[0]); + enum resctrl_event_id eventid; + int idx; - if (resctrl_arch_is_llc_occupancy_enabled()) { + if (resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID)) { d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL); if (!d->rmid_busy_llc) return -ENOMEM; } - if (resctrl_arch_is_mbm_total_enabled()) { - tsize = sizeof(*d->mbm_total); - d->mbm_total = kcalloc(idx_limit, tsize, GFP_KERNEL); - if (!d->mbm_total) { - bitmap_free(d->rmid_busy_llc); - return -ENOMEM; - } + + for_each_mbm_event_id(eventid) { + if (!resctrl_is_mon_event_enabled(eventid)) + continue; + idx = MBM_STATE_IDX(eventid); + d->mbm_states[idx] = kcalloc(idx_limit, tsize, GFP_KERNEL); + if (!d->mbm_states[idx]) + goto cleanup; } - if (resctrl_arch_is_mbm_local_enabled()) { - tsize = sizeof(*d->mbm_local); - d->mbm_local = kcalloc(idx_limit, tsize, GFP_KERNEL); - if (!d->mbm_local) { - bitmap_free(d->rmid_busy_llc); - kfree(d->mbm_total); - return -ENOMEM; - } + + if (resctrl_is_mbm_enabled() && r->mon.mbm_cntr_assignable) { + tsize = sizeof(*d->cntr_cfg); + d->cntr_cfg = kcalloc(r->mon.num_mbm_cntrs, tsize, GFP_KERNEL); + if (!d->cntr_cfg) + goto cleanup; } return 0; +cleanup: + bitmap_free(d->rmid_busy_llc); + for_each_mbm_idx(idx) { + kfree(d->mbm_states[idx]); + d->mbm_states[idx] = NULL; + } + + return -ENOMEM; } int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d) @@ -4144,7 +4597,7 @@ int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d) RESCTRL_PICK_ANY_CPU); } - if (resctrl_arch_is_llc_occupancy_enabled()) + if (resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID)) INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo); /* @@ -4219,7 +4672,7 @@ void resctrl_offline_cpu(unsigned int cpu) cancel_delayed_work(&d->mbm_over); mbm_setup_overflow_handler(d, 0, cpu); } - if (resctrl_arch_is_llc_occupancy_enabled() && + if (resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID) && cpu == d->cqm_work_cpu && has_busy_rmid(d)) { cancel_delayed_work(&d->cqm_limbo); cqm_setup_limbo_handler(d, 0, cpu); @@ -4336,12 +4789,14 @@ static bool resctrl_online_domains_exist(void) void resctrl_exit(void) { cpus_read_lock(); + get_online_mems(); WARN_ON_ONCE(resctrl_online_domains_exist()); mutex_lock(&rdtgroup_mutex); resctrl_fs_teardown(); mutex_unlock(&rdtgroup_mutex); + put_online_mems(); cpus_read_unlock(); debugfs_remove_recursive(debugfs_resctrl); diff --git a/include/acpi/acrestyp.h b/include/acpi/acrestyp.h index 842f932e2c2bc..2be59da4fe4fb 100644 --- a/include/acpi/acrestyp.h +++ b/include/acpi/acrestyp.h @@ -421,6 +421,7 @@ ACPI_RESOURCE_SERIAL_COMMON}; struct acpi_resource_i2c_serialbus { ACPI_RESOURCE_SERIAL_COMMON u8 access_mode; + u8 lvr; u16 slave_address; u32 connection_speed; }; diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index 20f3d62e7a16a..9fc28fb1890be 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -39,7 +39,8 @@ /* CPPC_AUTO_ACT_WINDOW_MAX_SIG is 127, so 128 and 129 will decay to 127 when writing */ #define CPPC_AUTO_ACT_WINDOW_SIG_CARRY_THRESH 129 -#define CPPC_ENERGY_PERF_MAX (0xFF) +#define CPPC_EPP_PERFORMANCE_PREF 0x00 +#define CPPC_EPP_ENERGY_EFFICIENCY_PREF 0xFF /* Each register has the folowing format. */ struct cpc_reg { @@ -119,7 +120,6 @@ struct cppc_perf_caps { u32 lowest_nonlinear_perf; u32 lowest_freq; u32 nominal_freq; - u32 energy_perf; bool auto_sel; }; @@ -151,6 +151,7 @@ extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf); extern int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf); extern int cppc_get_highest_perf(int cpunum, u64 *highest_perf); extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs); +extern int cppc_get_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls); extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls); extern int cppc_set_enable(int cpu, bool enable); extern int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps); @@ -172,6 +173,12 @@ extern int cppc_get_auto_act_window(int cpu, u64 *auto_act_window); extern int cppc_set_auto_act_window(int cpu, u64 auto_act_window); extern int cppc_get_auto_sel(int cpu, bool *enable); extern int cppc_set_auto_sel(int cpu, bool enable); +extern int cppc_get_min_perf(int cpu, u64 *min_perf); +extern int cppc_set_min_perf(int cpu, u64 min_perf); +extern int cppc_get_max_perf(int cpu, u64 *max_perf); +extern int cppc_set_max_perf(int cpu, u64 max_perf); +extern int cppc_get_perf_limited(int cpu, u64 *perf_limited); +extern int cppc_set_perf_limited(int cpu, u64 perf_limited); extern int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf); extern int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator); extern int amd_detect_prefcore(bool *detected); @@ -192,6 +199,10 @@ static inline int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ { return -EOPNOTSUPP; } +static inline int cppc_get_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) +{ + return -EOPNOTSUPP; +} static inline int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) { return -EOPNOTSUPP; @@ -260,6 +271,30 @@ static inline int cppc_set_auto_sel(int cpu, bool enable) { return -EOPNOTSUPP; } +static inline int cppc_get_min_perf(int cpu, u64 *min_perf) +{ + return -EOPNOTSUPP; +} +static inline int cppc_set_min_perf(int cpu, u64 min_perf) +{ + return -EOPNOTSUPP; +} +static inline int cppc_get_max_perf(int cpu, u64 *max_perf) +{ + return -EOPNOTSUPP; +} +static inline int cppc_set_max_perf(int cpu, u64 max_perf) +{ + return -EOPNOTSUPP; +} +static inline int cppc_get_perf_limited(int cpu, u64 *perf_limited) +{ + return -EOPNOTSUPP; +} +static inline int cppc_set_perf_limited(int cpu, u64 perf_limited) +{ + return -EOPNOTSUPP; +} static inline int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf) { return -ENODEV; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 219ef1b5970fc..42cbeaba2a510 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -8,6 +8,7 @@ #ifndef _LINUX_ACPI_H #define _LINUX_ACPI_H +#include #include #include /* for struct resource */ #include @@ -221,6 +222,17 @@ void acpi_reserve_initial_tables (void); void acpi_table_init_complete (void); int acpi_table_init (void); +static inline struct acpi_table_header *acpi_get_table_ret(char *signature, u32 instance) +{ + struct acpi_table_header *table; + int status = acpi_get_table(signature, instance, &table); + + if (ACPI_FAILURE(status)) + return ERR_PTR(-ENOENT); + return table; +} +DEFINE_FREE(acpi_table, struct acpi_table_header *, if (!IS_ERR_OR_NULL(_T)) acpi_put_table(_T)) + int acpi_table_parse(char *id, acpi_tbl_table_handler handler); int __init_or_acpilib acpi_table_parse_entries(char *id, unsigned long table_size, int entry_id, @@ -1541,6 +1553,9 @@ int find_acpi_cpu_topology(unsigned int cpu, int level); int find_acpi_cpu_topology_cluster(unsigned int cpu); int find_acpi_cpu_topology_package(unsigned int cpu); int find_acpi_cpu_topology_hetero_id(unsigned int cpu); +void acpi_pptt_get_cpus_from_container(u32 acpi_cpu_id, cpumask_t *cpus); +int find_acpi_cache_level_from_id(u32 cache_id); +int acpi_pptt_get_cpumask_from_cache_id(u32 cache_id, cpumask_t *cpus); #else static inline int acpi_pptt_cpu_is_thread(unsigned int cpu) { @@ -1562,6 +1577,17 @@ static inline int find_acpi_cpu_topology_hetero_id(unsigned int cpu) { return -EINVAL; } +static inline void acpi_pptt_get_cpus_from_container(u32 acpi_cpu_id, + cpumask_t *cpus) { } +static inline int find_acpi_cache_level_from_id(u32 cache_id) +{ + return -ENOENT; +} +static inline int acpi_pptt_get_cpumask_from_cache_id(u32 cache_id, + cpumask_t *cpus) +{ + return -ENOENT; +} #endif void acpi_arch_init(void); diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index d72d6e5aa2002..0c2a8b846c20c 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -89,6 +89,21 @@ void remove_cpu_topology(unsigned int cpuid); void reset_cpu_topology(void); int parse_acpi_topology(void); void freq_inv_set_max_ratio(int cpu, u64 max_rate); -#endif + +/* + * Architectures like ARM64 don't have reliable architectural way to get SMT + * information and depend on the firmware (ACPI/OF) report. Non-SMT core won't + * initialize thread_id so we can use this to detect the SMT implementation. + */ +static inline bool topology_core_has_smt(int cpu) +{ + return cpu_topology[cpu].thread_id != -1; +} + +#else + +static inline bool topology_core_has_smt(int cpu) { return false; } + +#endif /* CONFIG_GENERIC_ARCH_TOPOLOGY */ #endif /* _LINUX_ARCH_TOPOLOGY_H_ */ diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h new file mode 100644 index 0000000000000..aa7d6e1854741 --- /dev/null +++ b/include/linux/arm_mpam.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2025 Arm Ltd. */ + +#ifndef __LINUX_ARM_MPAM_H +#define __LINUX_ARM_MPAM_H + +#include +#include +#include +#include + +#define GLOBAL_AFFINITY ~0 + +struct mpam_msc; + +enum mpam_msc_iface { + MPAM_IFACE_MMIO, /* a real MPAM MSC */ + MPAM_IFACE_PCC, /* a fake MPAM MSC */ + MPAM_IFACE_SCMI, /* through a firmware interface */ +}; + +enum mpam_class_types { + MPAM_CLASS_CACHE, /* Well known caches, e.g. L2 */ + MPAM_CLASS_MEMORY, /* Main memory */ + MPAM_CLASS_UNKNOWN, /* Everything else, e.g. SMMU */ +}; + +#ifdef CONFIG_ACPI_MPAM +/* Parse the ACPI description of resources entries for this MSC. */ +int acpi_mpam_parse_resources(struct mpam_msc *msc, + struct acpi_mpam_msc_node *tbl_msc); + +int acpi_mpam_count_msc(void); +#else +static inline int acpi_mpam_parse_resources(struct mpam_msc *msc, + struct acpi_mpam_msc_node *tbl_msc) +{ + return -EINVAL; +} + +static inline int acpi_mpam_count_msc(void) { return -EINVAL; } +#endif + +int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx, + enum mpam_class_types type, u8 class_id, u32 component_id); + +struct resctrl_schema; +static inline u32 resctrl_arch_round_bw(u32 val, + const struct resctrl_schema *s __always_unused) +{ + /* + * Do nothing: for MPAM, resctrl_arch_update_one() has the necessary + * context to round the incoming value correctly. + */ + return val; +} + +static inline unsigned int resctrl_arch_round_mon_val(unsigned int val) +{ + return val; +} + +bool resctrl_arch_alloc_capable(void); +bool resctrl_arch_mon_capable(void); +bool resctrl_arch_mon_can_overflow(void); + +void resctrl_arch_set_cpu_default_closid(int cpu, u32 closid); +void resctrl_arch_set_closid_rmid(struct task_struct *tsk, u32 closid, u32 rmid); +void resctrl_arch_set_cpu_default_closid_rmid(int cpu, u32 closid, u32 rmid); +void resctrl_arch_sched_in(struct task_struct *tsk); +bool resctrl_arch_match_closid(struct task_struct *tsk, u32 closid); +bool resctrl_arch_match_rmid(struct task_struct *tsk, u32 closid, u32 rmid); +u32 resctrl_arch_rmid_idx_encode(u32 closid, u32 rmid); +void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid); +u32 resctrl_arch_system_num_rmid_idx(void); + +struct rdt_resource; +void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r, enum resctrl_event_id evtid); +void resctrl_arch_mon_ctx_free(struct rdt_resource *r, enum resctrl_event_id evtid, void *ctx); + +bool resctrl_arch_get_mb_uses_numa_nid(void); +int resctrl_arch_set_mb_uses_numa_nid(bool enabled); + +/* + * The CPU configuration for MPAM is cheap to write, and is only written if it + * has changed. No need for fine grained enables. + */ +static inline void resctrl_arch_enable_mon(void) { } +static inline void resctrl_arch_disable_mon(void) { } +static inline void resctrl_arch_enable_alloc(void) { } +static inline void resctrl_arch_disable_alloc(void) { } + +/** + * mpam_register_requestor() - Register a requestor with the MPAM driver + * @partid_max: The maximum PARTID value the requestor can generate. + * @pmg_max: The maximum PMG value the requestor can generate. + * + * Registers a requestor with the MPAM driver to ensure the chosen system-wide + * minimum PARTID and PMG values will allow the requestors features to be used. + * + * Returns an error if the registration is too late, and a larger PARTID/PMG + * value has been advertised to user-space. In this case the requestor should + * not use its MPAM features. Returns 0 on success. + */ +int mpam_register_requestor(u16 partid_max, u8 pmg_max); + +#endif /* __LINUX_ARM_MPAM_H */ diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index c8f4f0a0b874e..cfd45a5a46ae4 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -112,6 +112,7 @@ int acpi_get_cache_info(unsigned int cpu, #endif const struct attribute_group *cache_get_priv_group(struct cacheinfo *this_leaf); +u32 cache_of_calculate_id(struct device_node *np); /* * Get the cacheinfo structure for the cache associated with @cpu at @@ -147,6 +148,21 @@ static inline int get_cpu_cacheinfo_id(int cpu, int level) return ci ? ci->id : -1; } +/** + * get_cpu_cacheinfo_size() - Get the size of the cache. + * @cpu: The cpu that is associated with the cache. + * @level: The level of the cache as seen by @cpu. + * + * cpuhp lock must be held. + * Returns the cache-size on success, or 0 for an error. + */ +static inline unsigned int get_cpu_cacheinfo_size(int cpu, int level) +{ + struct cacheinfo *ci = get_cpu_cacheinfo_level(cpu, level); + + return ci ? ci->size : 0; +} + #if defined(CONFIG_ARM64) || defined(CONFIG_ARM) #define use_arch_cache_info() (true) #else diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 7cecda29447e3..855cc18833403 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -202,6 +202,8 @@ void debugfs_create_bool(const char *name, umode_t mode, struct dentry *parent, bool *value); void debugfs_create_str(const char *name, umode_t mode, struct dentry *parent, char **value); +void debugfs_create_cpumask(const char *name, umode_t mode, + struct dentry *parent, struct cpumask *value); struct dentry *debugfs_create_blob(const char *name, umode_t mode, struct dentry *parent, @@ -409,6 +411,10 @@ static inline void debugfs_create_str(const char *name, umode_t mode, char **value) { } +static inline void debugfs_create_cpumask(const char *name, umode_t mode, + struct dentry *parent, struct cpumask *value) +{ } + static inline struct dentry *debugfs_create_blob(const char *name, umode_t mode, struct dentry *parent, struct debugfs_blob_wrapper *blob) diff --git a/include/linux/i3c/ccc.h b/include/linux/i3c/ccc.h index ad59a4ae60d12..a145d766ab6f7 100644 --- a/include/linux/i3c/ccc.h +++ b/include/linux/i3c/ccc.h @@ -32,6 +32,7 @@ #define I3C_CCC_DEFSLVS I3C_CCC_ID(0x8, true) #define I3C_CCC_ENTTM I3C_CCC_ID(0xb, true) #define I3C_CCC_ENTHDR(x) I3C_CCC_ID(0x20 + (x), true) +#define I3C_CCC_SETAASA I3C_CCC_ID(0x29, true) /* Unicast-only commands */ #define I3C_CCC_SETDASA I3C_CCC_ID(0x7, false) diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h index 043f5c7ff398f..636f79badd036 100644 --- a/include/linux/i3c/master.h +++ b/include/linux/i3c/master.h @@ -174,10 +174,20 @@ struct i3c_device_ibi_info { * assigned a dynamic address by the master. Will be used during * bus initialization to assign it a specific dynamic address * before starting DAA (Dynamic Address Assignment) + * @static_addr_method: Bitmap describing which methods of Dynamic Address + * Assignment from a Static Address are supported by this I3C Target. + * A value of 1'b1 in a bit position indicates that the Bus Controller + * supports that method, and a value of 1'b0 indicates that the Bus + * Controller does not support that method. + * Bit 0: SETDASA CCC (Direct) + * Bit 1: SETAASA CCC (Broadcast) + * Bit 2: Other CCC (vendor / standards extension) + * All other bits are reserved. * @pid: I3C Provisioned ID exposed by the device. This is a unique identifier * that may be used to attach boardinfo to i3c_dev_desc when the device * does not have a static address - * @of_node: optional DT node in case the device has been described in the DT + * @fwnode: Firmware node (DT or ACPI) in case the device has been + * described in firmware * * This structure is used to attach board-level information to an I3C device. * Not all I3C devices connected on the bus will have a boardinfo. It's only @@ -188,8 +198,9 @@ struct i3c_dev_boardinfo { struct list_head node; u8 init_dyn_addr; u8 static_addr; + u8 static_addr_method; u64 pid; - struct device_node *of_node; + struct fwnode_handle *fwnode; }; /** @@ -509,6 +520,15 @@ struct i3c_master_controller_ops { * @boardinfo.i2c: list of I2C boardinfo objects * @boardinfo: board-level information attached to devices connected on the bus * @bus: I3C bus exposed by this master + * @addr_method: Bitmap describing which methods of Address Assignment required + * to be run for discovering all the devices on the bus. + * A value of 1'b1 in a bit position indicates that the Bus Controller + * supports that method, and a value of 1'b0 indicates that the Bus + * Controller does not support that method. + * Bit 0: SETDASA CCC (Direct) + * Bit 1: SETAASA CCC (Broadcast) + * Bit 2: Other CCC (vendor / standards extension) + * All other bits are reserved. * @wq: workqueue which can be used by master * drivers if they need to postpone operations that need to take place * in a thread context. Typical examples are Hot Join processing which @@ -533,6 +553,7 @@ struct i3c_master_controller { struct list_head i2c; } boardinfo; struct i3c_bus bus; + u8 addr_method; struct workqueue_struct *wq; }; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index c30d12e16473d..b74228f9f1ce0 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -703,6 +703,12 @@ struct iommu_ops { struct iommu_domain *parent_domain, const struct iommu_user_data *user_data); + /* Per group IOMMU features */ + int (*get_group_qos_params)(struct iommu_group *group, u16 *partition, + u8 *perf_mon_grp); + int (*set_group_qos_params)(struct iommu_group *group, u16 partition, + u8 perf_mon_grp); + const struct iommu_domain_ops *default_domain_ops; struct module *owner; struct iommu_domain *identity_domain; @@ -903,12 +909,15 @@ static inline struct iommu_domain *iommu_paging_domain_alloc(struct device *dev) { return iommu_paging_domain_alloc_flags(dev, 0); } +struct iommu_group *iommu_group_get_from_kobj(struct kobject *group_kobj); +extern struct iommu_group *iommu_group_get_by_id(int id); extern void iommu_domain_free(struct iommu_domain *domain); extern int iommu_attach_device(struct iommu_domain *domain, struct device *dev); extern void iommu_detach_device(struct iommu_domain *domain, struct device *dev); extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); +extern struct iommu_domain *iommu_get_domain_for_group(struct iommu_group *group); extern struct iommu_domain *iommu_get_dma_domain(struct device *dev); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot, gfp_t gfp); @@ -959,6 +968,7 @@ extern struct iommu_group *iommu_group_ref_get(struct iommu_group *group); extern void iommu_group_put(struct iommu_group *group); extern int iommu_group_id(struct iommu_group *group); +struct kset *iommu_get_group_kset(void); extern struct iommu_domain *iommu_group_default_domain(struct iommu_group *); int iommu_set_pgtable_quirks(struct iommu_domain *domain, @@ -1184,6 +1194,10 @@ void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid); ioasid_t iommu_alloc_global_pasid(struct device *dev); void iommu_free_global_pasid(ioasid_t pasid); +int iommu_group_set_qos_params(struct iommu_group *group, + u16 partition, u8 perf_mon_grp); +int iommu_group_get_qos_params(struct iommu_group *group, + u16 *partition, u8 *perf_mon_grp); #else /* CONFIG_IOMMU_API */ struct iommu_ops {}; @@ -1211,6 +1225,16 @@ static inline struct iommu_domain *iommu_paging_domain_alloc(struct device *dev) return ERR_PTR(-ENODEV); } +static inline struct iommu_group *iommu_group_get_from_kobj(struct kobject *group_kobj) +{ + return NULL; +} + +static inline struct iommu_group *iommu_group_get_by_id(int id) +{ + return NULL; +} + static inline void iommu_domain_free(struct iommu_domain *domain) { } @@ -1369,6 +1393,11 @@ static inline int iommu_group_id(struct iommu_group *group) return -ENODEV; } +static inline struct kset *iommu_get_group_kset(void) +{ + return NULL; +} + static inline int iommu_set_pgtable_quirks(struct iommu_domain *domain, unsigned long quirks) { @@ -1507,6 +1536,17 @@ static inline ioasid_t iommu_alloc_global_pasid(struct device *dev) } static inline void iommu_free_global_pasid(ioasid_t pasid) {} +static inline int iommu_group_set_qos_params(struct iommu_group *group, + u16 partition, u8 perf_mon_grp) +{ + return -ENODEV; +} + +static inline int iommu_group_get_qos_params(struct iommu_group *group, + u16 *partition, u8 *perf_mon_grp) +{ + return -ENODEV; +} #endif /* CONFIG_IOMMU_API */ #ifdef CONFIG_IRQ_MSI_IOMMU diff --git a/include/linux/kobject.h b/include/linux/kobject.h index c8219505a79f9..514e4cf1f0f54 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -200,6 +200,8 @@ static inline const struct kobj_type *get_ktype(const struct kobject *kobj) struct kobject *kset_find_obj(struct kset *, const char *); +struct kobject *kset_get_next_obj(struct kset *kset, struct kobject *prev); + /* The global /sys/kernel/ kobject for people to chain off of */ extern struct kobject *kernel_kobj; /* The global /sys/kernel/mm/ kobject for people to chain off of */ diff --git a/include/linux/memory.h b/include/linux/memory.h index 40eb70ccb09d5..2a770e7c6ab1e 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -126,6 +126,7 @@ struct mem_section; #define CPUSET_CALLBACK_PRI 10 #define MEMTIER_HOTPLUG_PRI 100 #define KSM_CALLBACK_PRI 100 +#define RESCTRL_CALLBACK_PRI 100 #ifndef CONFIG_MEMORY_HOTPLUG static inline void memory_dev_init(void) diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 23f038a162319..acc5ac1e92491 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -334,4 +334,10 @@ extern int arch_create_linear_mapping(int nid, u64 start, u64 size, void arch_remove_linear_mapping(u64 start, u64 size); #endif /* CONFIG_MEMORY_HOTPLUG */ +#if defined(CONFIG_LOCKDEP) && defined(CONFIG_MEMORY_HOTPLUG) +void lockdep_assert_mems_held(void); +#else +static inline void lockdep_assert_mems_held(void) { } +#endif + #endif /* __LINUX_MEMORY_HOTPLUG_H */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 17e244200d2c4..f06af598122f6 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -51,7 +51,7 @@ PCI_STATUS_PARITY) /* Number of reset methods used in pci_reset_fn_methods array in pci.c */ -#define PCI_NUM_RESET_METHODS 8 +#define PCI_NUM_RESET_METHODS 9 #define PCI_RESET_PROBE true #define PCI_RESET_DO_RESET false diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 93c9a26492fcf..2d39322c40c43 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -119,6 +119,7 @@ struct arm_pmu { /* PMUv3 only */ int pmuver; + bool has_smt; u64 reg_pmmir; u64 reg_brbidr; #define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 074754c23d330..23a30ada2d4cf 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -232,6 +232,7 @@ extern int platform_device_add_data(struct platform_device *pdev, extern int platform_device_add(struct platform_device *pdev); extern void platform_device_del(struct platform_device *pdev); extern void platform_device_put(struct platform_device *pdev); +DEFINE_FREE(platform_device_put, struct platform_device *, if (_T) platform_device_put(_T)) struct platform_driver { int (*probe)(struct platform_device *); diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 6fb4894b8cfd1..055f27045b4da 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -3,6 +3,7 @@ #define _RESCTRL_H #include +#include #include #include #include @@ -53,6 +54,8 @@ enum resctrl_res_level { RDT_RESOURCE_L2, RDT_RESOURCE_MBA, RDT_RESOURCE_SMBA, + RDT_RESOURCE_L3_MAX, + RDT_RESOURCE_L2_MAX, /* Must be the last */ RDT_NUM_RESOURCES, @@ -135,7 +138,7 @@ enum resctrl_domain_type { */ struct rdt_domain_hdr { struct list_head list; - int id; + u32 id; enum resctrl_domain_type type; struct cpumask cpu_mask; }; @@ -156,28 +159,43 @@ struct rdt_ctrl_domain { u32 *mbps_val; }; +/** + * struct mbm_cntr_cfg - Assignable counter configuration. + * @evtid: MBM event to which the counter is assigned. Only valid + * if @rdtgroup is not NULL. + * @rdtgrp: resctrl group assigned to the counter. NULL if the + * counter is free. + */ +struct mbm_cntr_cfg { + enum resctrl_event_id evtid; + struct rdtgroup *rdtgrp; +}; + /** * struct rdt_mon_domain - group of CPUs sharing a resctrl monitor resource * @hdr: common header for different domain types * @ci_id: cache info id for this domain * @rmid_busy_llc: bitmap of which limbo RMIDs are above threshold - * @mbm_total: saved state for MBM total bandwidth - * @mbm_local: saved state for MBM local bandwidth + * @mbm_states: Per-event pointer to the MBM event's saved state. + * An MBM event's state is an array of struct mbm_state + * indexed by RMID on x86 or combined CLOSID, RMID on Arm. * @mbm_over: worker to periodically read MBM h/w counters * @cqm_limbo: worker to periodically read CQM h/w counters * @mbm_work_cpu: worker CPU for MBM h/w counters * @cqm_work_cpu: worker CPU for CQM h/w counters + * @cntr_cfg: array of assignable counters' configuration (indexed + * by counter ID) */ struct rdt_mon_domain { struct rdt_domain_hdr hdr; unsigned int ci_id; unsigned long *rmid_busy_llc; - struct mbm_state *mbm_total; - struct mbm_state *mbm_local; + struct mbm_state *mbm_states[QOS_NUM_L3_MBM_EVENTS]; struct delayed_work mbm_over; struct delayed_work cqm_limbo; int mbm_work_cpu; int cqm_work_cpu; + struct mbm_cntr_cfg *cntr_cfg; }; /** @@ -219,22 +237,28 @@ enum membw_throttle_mode { * @min_bw: Minimum memory bandwidth percentage user can request * @max_bw: Maximum memory bandwidth value, used as the reset value * @bw_gran: Granularity at which the memory bandwidth is allocated - * @delay_linear: True if memory B/W delay is in linear scale - * @arch_needs_linear: True if we can't configure non-linear resources - * @throttle_mode: Bandwidth throttling mode when threads request - * different memory bandwidths - * @mba_sc: True if MBA software controller(mba_sc) is enabled - * @mb_map: Mapping of memory B/W percentage to memory B/W delay */ struct resctrl_membw { u32 min_bw; u32 max_bw; u32 bw_gran; - u32 delay_linear; - bool arch_needs_linear; - enum membw_throttle_mode throttle_mode; +}; + +/** + * struct resctrl_mba - Resource properties that are specific to the MBA resource + * @mba_sc: True if MBA software controller(mba_sc) is enabled + * @mb_map: Mapping of memory B/W percentage to memory B/W delay + * @delay_linear: True if control is in linear scale + * @arch_needs_linear: True if we can't configure non-linear resources + * @throttle_mode: Mode when threads request different control values + */ +struct resctrl_mba { bool mba_sc; u32 *mb_map; + bool delay_linear; + bool arch_needs_linear; + enum membw_throttle_mode throttle_mode; + }; struct resctrl_schema; @@ -248,11 +272,33 @@ enum resctrl_scope { /** * enum resctrl_schema_fmt - The format user-space provides for a schema. * @RESCTRL_SCHEMA_BITMAP: The schema is a bitmap in hex. - * @RESCTRL_SCHEMA_RANGE: The schema is a decimal number. + * @RESCTRL_SCHEMA_PERCENT: The schema is a percentage. + * @RESCTRL_SCHEMA_MBPS: The schema ia a MBps value. + * @RESCTRL_SCHEMA__AMD_MBA: The schema value is MBA for AMD platforms. */ enum resctrl_schema_fmt { RESCTRL_SCHEMA_BITMAP, - RESCTRL_SCHEMA_RANGE, + RESCTRL_SCHEMA_PERCENT, + RESCTRL_SCHEMA_MBPS, + RESCTRL_SCHEMA__AMD_MBA, +}; + +/** + * struct resctrl_mon - Monitoring related data of a resctrl resource. + * @num_rmid: Number of RMIDs available. + * @mbm_cfg_mask: Memory transactions that can be tracked when bandwidth + * monitoring events can be configured. + * @num_mbm_cntrs: Number of assignable counters. + * @mbm_cntr_assignable:Is system capable of supporting counter assignment? + * @mbm_assign_on_mkdir:True if counters should automatically be assigned to MBM + * events of monitor groups created via mkdir. + */ +struct resctrl_mon { + int num_rmid; + unsigned int mbm_cfg_mask; + int num_mbm_cntrs; + bool mbm_cntr_assignable; + bool mbm_assign_on_mkdir; }; /** @@ -260,15 +306,16 @@ enum resctrl_schema_fmt { * @rid: The index of the resource * @alloc_capable: Is allocation available on this machine * @mon_capable: Is monitor feature available on this machine - * @num_rmid: Number of RMIDs available * @ctrl_scope: Scope of this resource for control functions * @mon_scope: Scope of this resource for monitor functions * @cache: Cache allocation related data * @membw: If the component has bandwidth controls, their properties. + * @mon: Monitoring related data. * @ctrl_domains: RCU list of all control domains for this resource * @mon_domains: RCU list of all monitor domains for this resource + * @mba: Properties of the MBA resource * @name: Name to use in "schemata" file. - * @schema_fmt: Which format string and parser is used for this schema. + * @schema_fmt: Which format control parameters should be in for this resource. * @evt_list: List of monitoring events * @mbm_cfg_mask: Bandwidth sources that can be tracked when bandwidth * monitoring events can be configured. @@ -278,17 +325,16 @@ struct rdt_resource { int rid; bool alloc_capable; bool mon_capable; - int num_rmid; enum resctrl_scope ctrl_scope; enum resctrl_scope mon_scope; struct resctrl_cache cache; struct resctrl_membw membw; + struct resctrl_mon mon; + struct resctrl_mba mba; struct list_head ctrl_domains; struct list_head mon_domains; char *name; enum resctrl_schema_fmt schema_fmt; - struct list_head evt_list; - unsigned int mbm_cfg_mask; bool cdp_capable; }; @@ -305,9 +351,12 @@ struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l); * @list: Member of resctrl_schema_all. * @name: The name to use in the "schemata" file. * @fmt_str: Format string to show domain value. + * @schema_fmt: Which format string and parser is used for this schema. * @conf_type: Whether this schema is specific to code/data. * @res: The resource structure exported by the architecture to describe * the hardware that is configured by this schema. + * @membw The properties of the schema which may be different to the format + * that was specified by the resource, * @num_closid: The number of closid that can be used with this schema. When * features like CDP are enabled, this will be lower than the * hardware supports for the resource. @@ -316,8 +365,10 @@ struct resctrl_schema { struct list_head list; char name[8]; const char *fmt_str; + enum resctrl_schema_fmt schema_fmt; enum resctrl_conf_type conf_type; struct rdt_resource *res; + struct resctrl_membw membw; u32 num_closid; }; @@ -351,29 +402,71 @@ struct resctrl_mon_config_info { void resctrl_arch_sync_cpu_closid_rmid(void *info); /** - * resctrl_get_default_ctrl() - Return the default control value for this - * resource. - * @r: The resource whose default control type is queried. + * resctrl_get_resource_default_ctrl() - Return the default control value for + * this resource. + * @r: The resource whose default control value is queried. */ -static inline u32 resctrl_get_default_ctrl(struct rdt_resource *r) +static inline u32 resctrl_get_resource_default_ctrl(struct rdt_resource *r) { switch (r->schema_fmt) { case RESCTRL_SCHEMA_BITMAP: return BIT_MASK(r->cache.cbm_len) - 1; - case RESCTRL_SCHEMA_RANGE: + case RESCTRL_SCHEMA_PERCENT: + case RESCTRL_SCHEMA_MBPS: + case RESCTRL_SCHEMA__AMD_MBA: return r->membw.max_bw; } return WARN_ON_ONCE(1); } +/** + * resctrl_get_schema_default_ctrl() - Return the default control value for + * this schema. + * @s: The schema whose default control value is queried. + */ +static inline u32 resctrl_get_schema_default_ctrl(struct resctrl_schema *s) +{ + switch (s->schema_fmt) { + case RESCTRL_SCHEMA_BITMAP: + return resctrl_get_resource_default_ctrl(s->res); + case RESCTRL_SCHEMA_PERCENT: + case RESCTRL_SCHEMA_MBPS: + case RESCTRL_SCHEMA__AMD_MBA: + return s->membw.max_bw; + } + + return WARN_ON_ONCE(1); +} + /* The number of closid supported by this resource regardless of CDP */ u32 resctrl_arch_get_num_closid(struct rdt_resource *r); u32 resctrl_arch_system_num_rmid_idx(void); int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid); +void resctrl_enable_mon_event(enum resctrl_event_id eventid); + +bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid); + bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt); +static inline bool resctrl_is_mbm_event(enum resctrl_event_id eventid) +{ + return (eventid >= QOS_L3_MBM_TOTAL_EVENT_ID && + eventid <= QOS_L3_MBM_LOCAL_EVENT_ID); +} + +u32 resctrl_get_mon_evt_cfg(enum resctrl_event_id eventid); + +/* Iterate over all memory bandwidth events */ +#define for_each_mbm_event_id(eventid) \ + for (eventid = QOS_L3_MBM_TOTAL_EVENT_ID; \ + eventid <= QOS_L3_MBM_LOCAL_EVENT_ID; eventid++) + +/* Iterate over memory bandwidth arrays in domain structures */ +#define for_each_mbm_idx(idx) \ + for (idx = 0; idx < QOS_NUM_L3_MBM_EVENTS; idx++) + /** * resctrl_arch_mon_event_config_write() - Write the config for an event. * @config_info: struct resctrl_mon_config_info describing the resource, domain @@ -416,6 +509,28 @@ static inline u32 resctrl_get_config_index(u32 closid, bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l); int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable); +/** + * resctrl_arch_mbm_cntr_assign_enabled() - Check if MBM counter assignment + * mode is enabled. + * @r: Pointer to the resource structure. + * + * Return: + * true if the assignment mode is enabled, false otherwise. + */ +bool resctrl_arch_mbm_cntr_assign_enabled(struct rdt_resource *r); + +/** + * resctrl_arch_mbm_cntr_assign_set() - Configure the MBM counter assignment mode. + * @r: Pointer to the resource structure. + * @enable: Set to true to enable, false to disable the assignment mode. + * + * Return: + * 0 on success, < 0 on error. + */ +int resctrl_arch_mbm_cntr_assign_set(struct rdt_resource *r, bool enable); + +u32 resctrl_arch_round_bw(u32 val, const struct resctrl_schema *s); + /* * Update the ctrl_val and apply this config right now. * Must be called on one of the domain's CPUs. @@ -528,12 +643,70 @@ void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain * */ void resctrl_arch_reset_all_ctrls(struct rdt_resource *r); +/** + * resctrl_arch_config_cntr() - Configure the counter with its new RMID + * and event details. + * @r: Resource structure. + * @d: The domain in which counter with ID @cntr_id should be configured. + * @evtid: Monitoring event type (e.g., QOS_L3_MBM_TOTAL_EVENT_ID + * or QOS_L3_MBM_LOCAL_EVENT_ID). + * @rmid: RMID. + * @closid: CLOSID. + * @cntr_id: Counter ID to configure. + * @assign: True to assign the counter or update an existing assignment, + * false to unassign the counter. + * + * This can be called from any CPU. + */ +void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, + enum resctrl_event_id evtid, u32 rmid, u32 closid, + u32 cntr_id, bool assign); + +/** + * resctrl_arch_cntr_read() - Read the event data corresponding to the counter ID + * assigned to the RMID, event pair for this resource + * and domain. + * @r: Resource that the counter should be read from. + * @d: Domain that the counter should be read from. + * @closid: CLOSID that matches the RMID. + * @rmid: The RMID to which @cntr_id is assigned. + * @cntr_id: The counter to read. + * @eventid: The MBM event to which @cntr_id is assigned. + * @val: Result of the counter read in bytes. + * + * Called on a CPU that belongs to domain @d when "mbm_event" mode is enabled. + * Called from a non-migrateable process context via smp_call_on_cpu() unless all + * CPUs are nohz_full, in which case it is called via IPI (smp_call_function_any()). + * + * Return: + * 0 on success, or -EIO, -EINVAL etc on error. + */ +int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_mon_domain *d, + u32 closid, u32 rmid, int cntr_id, + enum resctrl_event_id eventid, u64 *val); + +/** + * resctrl_arch_reset_cntr() - Reset any private state associated with counter ID. + * @r: The domain's resource. + * @d: The counter ID's domain. + * @closid: CLOSID that matches the RMID. + * @rmid: The RMID to which @cntr_id is assigned. + * @cntr_id: The counter to reset. + * @eventid: The MBM event to which @cntr_id is assigned. + * + * This can be called from any CPU. + */ +void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, + u32 closid, u32 rmid, int cntr_id, + enum resctrl_event_id eventid); + extern unsigned int resctrl_rmid_realloc_threshold; extern unsigned int resctrl_rmid_realloc_limit; int resctrl_init(void); void resctrl_exit(void); + #ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK u64 resctrl_arch_get_prefetch_disable_bits(void); int resctrl_arch_pseudo_lock_fn(void *_plr); @@ -547,4 +720,30 @@ static inline int resctrl_arch_measure_cycles_lat_fn(void *_plr) { return 0; } static inline int resctrl_arch_measure_l2_residency(void *_plr) { return 0; } static inline int resctrl_arch_measure_l3_residency(void *_plr) { return 0; } #endif /* CONFIG_RESCTRL_FS_PSEUDO_LOCK */ + +/* When supported, the architecture must implement these */ +#ifdef CONFIG_RESCTRL_IOMMU +int resctrl_arch_set_iommu_closid_rmid(struct iommu_group *group, u32 closid, + u32 rmid); +bool resctrl_arch_match_iommu_closid(struct iommu_group *group, u32 closid); +bool resctrl_arch_match_iommu_closid_rmid(struct iommu_group *group, u32 closid, + u32 rmid); +#else +static inline int resctrl_arch_set_iommu_closid_rmid(struct iommu_group *group, + u32 closid, u32 rmid) +{ + return -EOPNOTSUPP; +} +static inline bool resctrl_arch_match_iommu_closid(struct iommu_group *group, + u32 closid) +{ + return false; +} +static inline bool +resctrl_arch_match_iommu_closid_rmid(struct iommu_group *group, + u32 closid, u32 rmid) +{ + return false; +} +#endif /* CONFIG_RESCTRL_IOMMU */ #endif /* _RESCTRL_H */ diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h index a25fb9c4070d3..acfe07860b346 100644 --- a/include/linux/resctrl_types.h +++ b/include/linux/resctrl_types.h @@ -34,11 +34,18 @@ /* Max event bits supported */ #define MAX_EVT_CONFIG_BITS GENMASK(6, 0) -/* - * Event IDs, the values match those used to program IA32_QM_EVTSEL before - * reading IA32_QM_CTR on RDT systems. - */ +/* Number of memory transactions that an MBM event can be configured with */ +#define NUM_MBM_TRANSACTIONS 7 + +/* Event IDs */ enum resctrl_event_id { + /* Must match value of first event below */ + QOS_FIRST_EVENT = 0x01, + + /* + * These values match those used to program IA32_QM_EVTSEL before + * reading IA32_QM_CTR on RDT systems. + */ QOS_L3_OCCUP_EVENT_ID = 0x01, QOS_L3_MBM_TOTAL_EVENT_ID = 0x02, QOS_L3_MBM_LOCAL_EVENT_ID = 0x03, @@ -47,4 +54,7 @@ enum resctrl_event_id { QOS_NUM_EVENTS, }; +#define QOS_NUM_L3_MBM_EVENTS (QOS_L3_MBM_LOCAL_EVENT_ID - QOS_L3_MBM_TOTAL_EVENT_ID + 1) +#define MBM_STATE_IDX(evt) ((evt) - QOS_L3_MBM_TOTAL_EVENT_ID) + #endif /* __LINUX_RESCTRL_TYPES_H */ diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 688466a0e8162..33989a689e6fc 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -877,6 +877,17 @@ struct scmi_notify_ops { struct notifier_block *nb); }; +/** + * struct scmi_mpam_proto_ops - operations provided by SCMI MPAM Protocol + * + * @mpam_transfer_buf: transfer an SCMI MPAM message to the agent + */ +struct scmi_mpam_proto_ops { + int (*mpam_transfer_buf)(const struct scmi_protocol_handle *ph, + u8 msg_id, void *msg_buf, size_t msg_len, + u32 *ret_val); +}; + /** * struct scmi_handle - Handle returned to ARM SCMI clients for usage. * @@ -926,6 +937,7 @@ enum scmi_std_protocol { SCMI_PROTOCOL_VOLTAGE = 0x17, SCMI_PROTOCOL_POWERCAP = 0x18, SCMI_PROTOCOL_PINCTRL = 0x19, + SCMI_PROTOCOL_MPAM = 0x1a, }; enum scmi_system_events { diff --git a/lib/kobject.c b/lib/kobject.c index abe5f5b856ceb..b1fdd6ad60f1b 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -920,6 +920,27 @@ struct kobject *kset_find_obj(struct kset *kset, const char *name) } EXPORT_SYMBOL_GPL(kset_find_obj); +struct kobject *kset_get_next_obj(struct kset *kset, struct kobject *prev) +{ + struct kobject *k; + + spin_lock(&kset->list_lock); + + if (!prev) + k = list_first_entry_or_null(&kset->list, typeof(*k), entry); + else + k = list_next_entry(prev, entry); + + if (list_entry_is_head(k, &kset->list, entry)) + k = NULL; + + kobject_get(k); + spin_unlock(&kset->list_lock); + kobject_put(prev); + + return k; +} + static void kset_release(struct kobject *kobj) { struct kset *kset = container_of(kobj, struct kset, kobj); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 74318c7877156..89ec5ed8c488b 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -218,6 +218,17 @@ void put_online_mems(void) percpu_up_read(&mem_hotplug_lock); } +#ifdef CONFIG_LOCKDEP +void lockdep_assert_mems_held(void) +{ + /* See lockdep_assert_cpus_held() */ + if (system_state < SYSTEM_RUNNING) + return; + + percpu_rwsem_assert_held(&mem_hotplug_lock); +} +#endif + bool movable_node_enabled = false; static int mhp_default_online_type = -1; diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 6a25cca5636f7..f4dceecf7e335 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -726,6 +726,45 @@ static void default_mock_decoder(struct cxl_decoder *cxld) cxld->reset = mock_decoder_reset; } +static void size_zero_mock_decoder_ep(struct cxl_decoder *cxld, u64 base) +{ + struct cxl_endpoint_decoder *cxled; + + cxled = to_cxl_endpoint_decoder(&cxld->dev); + cxld->hpa_range = (struct range){ + .start = base, + .end = base - 1, /* Size 0 */ + }; + + cxld->interleave_ways = 2; + cxld->interleave_granularity = 4096; + cxld->target_type = CXL_DECODER_HOSTONLYMEM; + cxld->flags = CXL_DECODER_F_ENABLE; + cxled->state = CXL_DECODER_STATE_AUTO; + cxld->commit = mock_decoder_commit; + cxld->reset = mock_decoder_reset; +} + +static void size_zero_mock_decoder_sw(struct device *dev, u64 base, int i) +{ + struct cxl_switch_decoder *cxlsd; + struct cxl_decoder *cxld; + + cxlsd = to_cxl_switch_decoder(dev); + cxld = &cxlsd->cxld; + cxld->flags = CXL_DECODER_F_ENABLE; + cxld->target_type = CXL_DECODER_HOSTONLYMEM; + if (i == 0) + cxld->interleave_ways = 2; + else + cxld->interleave_ways = 1; + cxld->interleave_granularity = 4096; + cxld->hpa_range = (struct range) { + .start = base, + .end = base - 1, /* Size 0 */ + }; +} + static int first_decoder(struct device *dev, const void *data) { struct cxl_decoder *cxld; @@ -738,6 +777,30 @@ static int first_decoder(struct device *dev, const void *data) return 0; } +static int second_decoder(struct device *dev, const void *data) +{ + struct cxl_decoder *cxld; + + if (!is_switch_decoder(dev)) + return 0; + cxld = to_cxl_decoder(dev); + if (cxld->id == 1) + return 1; + return 0; +} + +static int third_decoder(struct device *dev, const void *data) +{ + struct cxl_decoder *cxld; + + if (!is_switch_decoder(dev)) + return 0; + cxld = to_cxl_decoder(dev); + if (cxld->id == 2) + return 1; + return 0; +} + static void mock_init_hdm_decoder(struct cxl_decoder *cxld) { struct acpi_cedt_cfmws *window = mock_cfmws[0]; @@ -750,7 +813,7 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld) struct cxl_dport *dport; struct device *dev; bool hb0 = false; - u64 base; + u64 base = window->base_hpa; int i; if (is_endpoint_decoder(&cxld->dev)) { @@ -774,6 +837,20 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld) port = cxled_to_port(cxled); } + /* + * Decoders 1 and 2 of the endpoint under host bridge 0 should be enabled as zero-sized. + * It would be even better to make sure that the parent switch uport decoder was + * also enabled before enabling the size zero decoders but there is no harm in doing it + * anyway. + */ + if (hb0 && (cxld->id == 1 || cxld->id == 2)) { + port = to_cxl_port(cxld->dev.parent); + size_zero_mock_decoder_ep(cxld, base); + /* Commit the zero-sized decoder */ + port->commit_end = cxld->id; + return; + } + /* * The first decoder on the first 2 devices on the first switch * attached to host-bridge0 mock a fake / static RAM region. All @@ -787,7 +864,6 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld) return; } - base = window->base_hpa; cxld->hpa_range = (struct range) { .start = base, .end = base + size - 1, @@ -845,6 +921,22 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld) .end = base + size - 1, }; put_device(dev); + + /* Enable the next two decoders also and make them zero sized */ + dev = device_find_child(&iter->dev, NULL, second_decoder); + WARN_ON(!dev); + if (dev) { + size_zero_mock_decoder_sw(dev, base, i); + iter->commit_end = 1; + put_device(dev); + } + dev = device_find_child(&iter->dev, NULL, third_decoder); + WARN_ON(!dev); + if (dev) { + size_zero_mock_decoder_sw(dev, base, i); + iter->commit_end = 2; + put_device(dev); + } } }